aboutsummaryrefslogtreecommitdiff
path: root/libstdc++-v3
diff options
context:
space:
mode:
authorTim Shen <timshen91@gmail.com>2014-01-16 23:35:21 +0000
committerTim Shen <timshen@gcc.gnu.org>2014-01-16 23:35:21 +0000
commitddf41e9db6f43e3fdb0f2717fb47851a2a64d5bc (patch)
treed4af0e8eec13c68aaeba7d1ccacd8f8508d5064f /libstdc++-v3
parent9e6f9ad62c66546ac8b475521e840c9661b6fa78 (diff)
downloadgcc-ddf41e9db6f43e3fdb0f2717fb47851a2a64d5bc.zip
gcc-ddf41e9db6f43e3fdb0f2717fb47851a2a64d5bc.tar.gz
gcc-ddf41e9db6f43e3fdb0f2717fb47851a2a64d5bc.tar.bz2
regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not use std::map.
2014-01-17 Tim Shen <timshen91@gmail.com> * include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not use std::map. * include/bits/regex_automaton.h: Do not use std::set. * include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_char(), _BracketMatcher<>::_M_add_collating_element(), _BracketMatcher<>::_M_add_equivalence_class(), _BracketMatcher<>::_M_make_range()): Likewise. * include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply()): Likewise. * include/bits/regex_executor.h: Do not use std::queue. * include/bits/regex_executor.tcc (_Executor<>::_M_main(), _Executor<>::_M_dfs()): Likewise. * include/std/regex: Remove <map>, <set> and <queue>. 2014-01-17 Tim Shen <timshen91@gmail.com> * include/bits/regex.h (__compile_nfa<>(), basic_regex<>::basic_regex(), basic_regex<>::assign()): Change __compile_nfa to accept const _CharT* only. * include/bits/regex_compiler.h: Change _Compiler's template argument from <_FwdIter, _TraitsT> to <_TraitsT>. * include/bits/regex_compiler.tcc: Likewise. 2014-01-17 Tim Shen <timshen91@gmail.com> * include/bits/regex_compiler.h: Change _ScannerT into char-type templated. * include/bits/regex_scanner.h (_Scanner<>::_Scanner()): Separate _ScannerBase from _Scanner; Change _Scanner's template argument from _FwdIter to _CharT. Avoid use of std::map and std::set by using arrays instead. * include/bits/regex_scanner.tcc (_Scanner<>::_Scanner(), _Scanner<>::_M_scan_normal(), _Scanner<>::_M_eat_escape_ecma(), _Scanner<>::_M_eat_escape_posix(), _Scanner<>::_M_eat_escape_awk()): Likewise. * include/std/regex: Add <cstring> for using strchr. 2014-01-17 Tim Shen <timshen91@gmail.com> * bits/regex_automaton.tcc: Indentation fix. * bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>, _RegexTranslator<> _AnyMatcher<>, _CharMatcher<>, _BracketMatcher<>): Add bool option template parameters and specializations to make matching more efficient and space saving. * bits/regex_compiler.tcc: Likewise. From-SVN: r206690
Diffstat (limited to 'libstdc++-v3')
-rw-r--r--libstdc++-v3/ChangeLog48
-rw-r--r--libstdc++-v3/include/bits/regex.h59
-rw-r--r--libstdc++-v3/include/bits/regex_automaton.h7
-rw-r--r--libstdc++-v3/include/bits/regex_automaton.tcc27
-rw-r--r--libstdc++-v3/include/bits/regex_compiler.h308
-rw-r--r--libstdc++-v3/include/bits/regex_compiler.tcc189
-rw-r--r--libstdc++-v3/include/bits/regex_executor.h26
-rw-r--r--libstdc++-v3/include/bits/regex_executor.tcc9
-rw-r--r--libstdc++-v3/include/bits/regex_scanner.h240
-rw-r--r--libstdc++-v3/include/bits/regex_scanner.tcc163
-rw-r--r--libstdc++-v3/include/std/regex4
11 files changed, 593 insertions, 487 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 18b44da..73b48b2 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,51 @@
+2014-01-17 Tim Shen <timshen91@gmail.com>
+
+ * include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not
+ use std::map.
+ * include/bits/regex_automaton.h: Do not use std::set.
+ * include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_char(),
+ _BracketMatcher<>::_M_add_collating_element(),
+ _BracketMatcher<>::_M_add_equivalence_class(),
+ _BracketMatcher<>::_M_make_range()): Likewise.
+ * include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply()):
+ Likewise.
+ * include/bits/regex_executor.h: Do not use std::queue.
+ * include/bits/regex_executor.tcc (_Executor<>::_M_main(),
+ _Executor<>::_M_dfs()): Likewise.
+ * include/std/regex: Remove <map>, <set> and <queue>.
+
+2014-01-17 Tim Shen <timshen91@gmail.com>
+
+ * include/bits/regex.h (__compile_nfa<>(), basic_regex<>::basic_regex(),
+ basic_regex<>::assign()): Change __compile_nfa to accept
+ const _CharT* only.
+ * include/bits/regex_compiler.h: Change _Compiler's template
+ argument from <_FwdIter, _TraitsT> to <_TraitsT>.
+ * include/bits/regex_compiler.tcc: Likewise.
+
+2014-01-17 Tim Shen <timshen91@gmail.com>
+
+ * include/bits/regex_compiler.h: Change _ScannerT into char-type
+ templated.
+ * include/bits/regex_scanner.h (_Scanner<>::_Scanner()): Separate
+ _ScannerBase from _Scanner; Change _Scanner's template argument from
+ _FwdIter to _CharT. Avoid use of std::map and std::set by using arrays
+ instead.
+ * include/bits/regex_scanner.tcc (_Scanner<>::_Scanner(),
+ _Scanner<>::_M_scan_normal(), _Scanner<>::_M_eat_escape_ecma(),
+ _Scanner<>::_M_eat_escape_posix(), _Scanner<>::_M_eat_escape_awk()):
+ Likewise.
+ * include/std/regex: Add <cstring> for using strchr.
+
+2014-01-17 Tim Shen <timshen91@gmail.com>
+
+ * bits/regex_automaton.tcc: Indentation fix.
+ * bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>,
+ _RegexTranslator<> _AnyMatcher<>, _CharMatcher<>,
+ _BracketMatcher<>): Add bool option template parameters and
+ specializations to make matching more efficient and space saving.
+ * bits/regex_compiler.tcc: Likewise.
+
2014-01-15 François Dumont <fdumont@gcc.gnu.org>
PR libstdc++/59712
diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index ae8e1f5..816f5cf 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -60,51 +60,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename, typename, bool>
class _Executor;
- template<typename _Tp>
- struct __has_contiguous_iter : std::false_type { };
-
- template<typename _Ch, typename _Tr, typename _Alloc>
- struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>>
- : std::true_type // string<Ch> storage is contiguous
- { };
-
- template<typename _Tp, typename _Alloc>
- struct __has_contiguous_iter<std::vector<_Tp, _Alloc>>
- : std::true_type // vector<Tp> storage is contiguous
- { };
-
- template<typename _Alloc>
- struct __has_contiguous_iter<std::vector<bool, _Alloc>>
- : std::false_type // vector<bool> storage is not contiguous
- { };
-
- template<typename _Tp>
- struct __is_contiguous_normal_iter : std::false_type { };
-
- template<typename _Tp, typename _Cont>
- struct
- __is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>>
- : __has_contiguous_iter<_Cont>::type
- { };
-
- template<typename _Iter, typename _TraitsT>
- using __enable_if_contiguous_normal_iter
- = typename enable_if< __is_contiguous_normal_iter<_Iter>::value,
- std::shared_ptr<_NFA<_TraitsT>> >::type;
-
- template<typename _Iter, typename _TraitsT>
- using __disable_if_contiguous_normal_iter
- = typename enable_if< !__is_contiguous_normal_iter<_Iter>::value,
- std::shared_ptr<_NFA<_TraitsT>> >::type;
-
- template<typename _FwdIter, typename _TraitsT>
- __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
- __compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits,
- regex_constants::syntax_option_type __flags);
-
- template<typename _Iter, typename _TraitsT>
- __enable_if_contiguous_normal_iter<_Iter, _TraitsT>
- __compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits,
+ template<typename _TraitsT>
+ inline std::shared_ptr<_NFA<_TraitsT>>
+ __compile_nfa(const typename _TraitsT::char_type* __first,
+ const typename _TraitsT::char_type* __last,
+ const _TraitsT& __traits,
regex_constants::syntax_option_type __flags);
_GLIBCXX_END_NAMESPACE_VERSION
@@ -561,7 +521,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
flag_type __f = ECMAScript)
: _M_flags(__f),
_M_original_str(__first, __last),
- _M_automaton(__detail::__compile_nfa(__first, __last, _M_traits,
+ _M_automaton(__detail::__compile_nfa(_M_original_str.c_str(),
+ _M_original_str.c_str()
+ + _M_original_str.size(),
+ _M_traits,
_M_flags))
{ }
@@ -698,7 +661,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
_M_flags = __flags;
_M_original_str.assign(__s.begin(), __s.end());
- _M_automaton = __detail::__compile_nfa(__s.begin(), __s.end(),
+ auto __p = _M_original_str.c_str();
+ _M_automaton = __detail::__compile_nfa(__p,
+ __p + _M_original_str.size(),
_M_traits, _M_flags);
return *this;
}
diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h
index ea73675..a442cfe 100644
--- a/libstdc++-v3/include/bits/regex_automaton.h
+++ b/libstdc++-v3/include/bits/regex_automaton.h
@@ -41,7 +41,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/
typedef long _StateIdT;
- typedef std::set<_StateIdT> _StateSet;
static const _StateIdT _S_invalid_state_id = -1;
template<typename _CharT>
@@ -138,16 +137,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_start() const
{ return _M_start_state; }
- const _StateSet&
- _M_final_states() const
- { return _M_accepting_states; }
-
_SizeT
_M_sub_count() const
{ return _M_subexpr_count; }
std::vector<size_t> _M_paren_stack;
- _StateSet _M_accepting_states;
_FlagT _M_flags;
_StateIdT _M_start_state;
_SizeT _M_subexpr_count;
@@ -172,7 +166,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_insert_accept()
{
auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
- this->_M_accepting_states.insert(__ret);
return __ret;
}
diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc b/libstdc++-v3/include/bits/regex_automaton.tcc
index 7edc67f..759b053 100644
--- a/libstdc++-v3/include/bits/regex_automaton.tcc
+++ b/libstdc++-v3/include/bits/regex_automaton.tcc
@@ -134,9 +134,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const
{
__ostr << "digraph _Nfa {\n"
- " rankdir=LR;\n";
+ " rankdir=LR;\n";
for (size_t __i = 0; __i < this->size(); ++__i)
- (*this)[__i]._M_dot(__ostr, __i);
+ (*this)[__i]._M_dot(__ostr, __i);
__ostr << "}\n";
return __ostr;
}
@@ -186,7 +186,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSeq<_TraitsT>
_StateSeq<_TraitsT>::_M_clone()
{
- std::map<_StateIdT, _StateIdT> __m;
+ std::vector<_StateIdT> __m(_M_nfa.size(), -1);
std::stack<_StateIdT> __stack;
__stack.push(_M_start);
while (!__stack.empty())
@@ -194,30 +194,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __u = __stack.top();
__stack.pop();
auto __dup = _M_nfa[__u];
+ // _M_insert_state() never return -1
auto __id = _M_nfa._M_insert_state(__dup);
__m[__u] = __id;
if (__u == _M_end)
continue;
- if (__m.count(__dup._M_next) == 0)
+ if (__dup._M_next != _S_invalid_state_id && __m[__dup._M_next] == -1)
__stack.push(__dup._M_next);
if (__dup._M_opcode == _S_opcode_alternative
|| __dup._M_opcode == _S_opcode_subexpr_lookahead)
- if (__m.count(__dup._M_alt) == 0)
+ if (__dup._M_alt != _S_invalid_state_id && __m[__dup._M_alt] == -1)
__stack.push(__dup._M_alt);
}
- for (auto __it : __m)
+ long __size = static_cast<long>(__m.size());
+ for (long __k = 0; __k < __size; __k++)
{
- auto& __ref = _M_nfa[__it.second];
- if (__ref._M_next != -1)
+ long __v;
+ if ((__v = __m[__k]) == -1)
+ continue;
+ auto& __ref = _M_nfa[__v];
+ if (__ref._M_next != _S_invalid_state_id)
{
- _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next));
+ _GLIBCXX_DEBUG_ASSERT(__m[__ref._M_next] != -1);
__ref._M_next = __m[__ref._M_next];
}
if (__ref._M_opcode == _S_opcode_alternative
|| __ref._M_opcode == _S_opcode_subexpr_lookahead)
- if (__ref._M_alt != -1)
+ if (__ref._M_alt != _S_invalid_state_id)
{
- _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt));
+ _GLIBCXX_DEBUG_ASSERT(__m[__ref._M_alt] != -1);
__ref._M_alt = __m[__ref._M_alt];
}
}
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 4ac67df..216f8fb 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -39,19 +39,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{
*/
- template<typename _TraitsT>
+ template<typename, bool, bool>
struct _BracketMatcher;
/// Builds an NFA from an input iterator interval.
- template<typename _FwdIter, typename _TraitsT>
+ template<typename _TraitsT>
class _Compiler
{
public:
- typedef typename _TraitsT::string_type _StringT;
+ typedef typename _TraitsT::char_type _CharT;
+ typedef const _CharT* _IterT;
typedef _NFA<_TraitsT> _RegexT;
typedef regex_constants::syntax_option_type _FlagT;
- _Compiler(_FwdIter __b, _FwdIter __e,
+ _Compiler(_IterT __b, _IterT __e,
const _TraitsT& __traits, _FlagT __flags);
std::shared_ptr<_RegexT>
@@ -59,12 +60,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ return make_shared<_RegexT>(std::move(_M_nfa)); }
private:
- typedef _Scanner<_FwdIter> _ScannerT;
- typedef typename _ScannerT::_TokenT _TokenT;
- typedef _StateSeq<_TraitsT> _StateSeqT;
- typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
- typedef _BracketMatcher<_TraitsT> _BMatcherT;
- typedef std::ctype<typename _TraitsT::char_type> _CtypeT;
+ typedef _Scanner<_CharT> _ScannerT;
+ typedef typename _TraitsT::string_type _StringT;
+ typedef typename _ScannerT::_TokenT _TokenT;
+ typedef _StateSeq<_TraitsT> _StateSeqT;
+ typedef std::stack<_StateSeqT> _StackT;
+ typedef std::ctype<_CharT> _CtypeT;
// accepts a specific token or returns false.
bool
@@ -91,20 +92,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool
_M_bracket_expression();
- void
- _M_expression_term(_BMatcherT& __matcher);
+ template<bool __icase, bool __collate>
+ void
+ _M_insert_any_matcher_ecma();
- bool
- _M_range_expression(_BMatcherT& __matcher);
+ template<bool __icase, bool __collate>
+ void
+ _M_insert_any_matcher_posix();
- bool
- _M_collating_symbol(_BMatcherT& __matcher);
+ template<bool __icase, bool __collate>
+ void
+ _M_insert_char_matcher();
- bool
- _M_equivalence_class(_BMatcherT& __matcher);
+ template<bool __icase, bool __collate>
+ void
+ _M_insert_character_class_matcher();
- bool
- _M_character_class(_BMatcherT& __matcher);
+ template<bool __icase, bool __collate>
+ void
+ _M_insert_bracket_matcher(bool __neg);
+
+ template<bool __icase, bool __collate>
+ void
+ _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>&
+ __matcher);
int
_M_cur_int_value(int __radix);
@@ -129,33 +140,119 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StackT _M_stack;
};
- template<typename _FwdIter, typename _TraitsT>
- inline __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
- __compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits,
+ template<typename _TraitsT>
+ inline std::shared_ptr<_NFA<_TraitsT>>
+ __compile_nfa(const typename _TraitsT::char_type* __first,
+ const typename _TraitsT::char_type* __last,
+ const _TraitsT& __traits,
regex_constants::syntax_option_type __flags)
{
- using _Cmplr = _Compiler<_FwdIter, _TraitsT>;
+ using _Cmplr = _Compiler<_TraitsT>;
return _Cmplr(__first, __last, __traits, __flags)._M_get_nfa();
}
- template<typename _Iter, typename _TraitsT>
- inline __enable_if_contiguous_normal_iter<_Iter, _TraitsT>
- __compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits,
- regex_constants::syntax_option_type __flags)
+ // [28.13.14]
+ template<typename _TraitsT, bool __icase, bool __collate>
+ class _RegexTranslator
{
- size_t __len = __last - __first;
- const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr;
- return __compile_nfa(__cfirst, __cfirst + __len, __traits, __flags);
- }
+ public:
+ typedef typename _TraitsT::char_type _CharT;
+ typedef typename _TraitsT::string_type _StringT;
+ typedef typename std::conditional<__collate,
+ _StringT,
+ _CharT>::type _StrTransT;
+
+ explicit
+ _RegexTranslator(const _TraitsT& __traits)
+ : _M_traits(__traits)
+ { }
+
+ _CharT
+ _M_translate(_CharT __ch) const
+ {
+ if (__icase)
+ return _M_traits.translate_nocase(__ch);
+ else if (__collate)
+ return _M_traits.translate(__ch);
+ else
+ return __ch;
+ }
+
+ _StrTransT
+ _M_transform(_CharT __ch) const
+ {
+ return _M_transform_impl(__ch, typename integral_constant<bool,
+ __collate>::type());
+ }
+
+ private:
+ _StrTransT
+ _M_transform_impl(_CharT __ch, false_type) const
+ { return __ch; }
+
+ _StrTransT
+ _M_transform_impl(_CharT __ch, true_type) const
+ {
+ _StrTransT __str = _StrTransT(1, _M_translate(__ch));
+ return _M_traits.transform(__str.begin(), __str.end());
+ }
- template<typename _TraitsT, bool __is_ecma>
- struct _AnyMatcher
+ const _TraitsT& _M_traits;
+ };
+
+ template<typename _TraitsT>
+ class _RegexTranslator<_TraitsT, false, false>
{
- typedef typename _TraitsT::char_type _CharT;
+ public:
+ typedef typename _TraitsT::char_type _CharT;
+ typedef _CharT _StrTransT;
+
+ explicit
+ _RegexTranslator(const _TraitsT& __traits)
+ { }
+
+ _CharT
+ _M_translate(_CharT __ch) const
+ { return __ch; }
+
+ _StrTransT
+ _M_transform(_CharT __ch) const
+ { return __ch; }
+ };
+
+ template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
+ struct _AnyMatcher;
+
+ template<typename _TraitsT, bool __icase, bool __collate>
+ struct _AnyMatcher<_TraitsT, false, __icase, __collate>
+ {
+ typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
+ typedef typename _TransT::_CharT _CharT;
explicit
_AnyMatcher(const _TraitsT& __traits)
- : _M_traits(__traits)
+ : _M_translator(__traits)
+ { }
+
+ bool
+ operator()(_CharT __ch) const
+ {
+ static auto __nul = _M_translator._M_translate('\0');
+ return _M_translator._M_translate(__ch) != __nul;
+ }
+
+ _TransT _M_translator;
+ };
+
+ template<typename _TraitsT, bool __icase, bool __collate>
+ struct _AnyMatcher<_TraitsT, true, __icase, __collate>
+ {
+ typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
+ typedef typename _TransT::_CharT _CharT;
+
+ explicit
+ _AnyMatcher(const _TraitsT& __traits)
+ : _M_translator(__traits)
{ }
bool
@@ -165,92 +262,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool
_M_apply(_CharT __ch, true_type) const
{
- auto __c = _M_traits.translate(__ch);
- if (__is_ecma)
- {
- static auto __n = _M_traits.translate('\n');
- static auto __r = _M_traits.translate('\r');
- return __c != __n && __c != __r;
- }
- else
- {
- static auto __nul = _M_traits.translate('\0');
- return __c != __nul;
- }
+ auto __c = _M_translator._M_translate(__ch);
+ auto __n = _M_translator._M_translate('\n');
+ auto __r = _M_translator._M_translate('\r');
+ return __c != __n && __c != __r;
}
bool
_M_apply(_CharT __ch, false_type) const
{
- auto __c = _M_traits.translate(__ch);
- if (__is_ecma)
- {
- static auto __n = _M_traits.translate('\n');
- static auto __r = _M_traits.translate('\r');
- static auto __u2028 = _M_traits.translate(u'\u2028');
- static auto __u2029 = _M_traits.translate(u'\u2029');
- return __c != __n && __c != __r && __c != __u2028
- && __c != __u2029;
- }
- else
- {
- static auto __nul = _M_traits.translate('\0');
- return __c != __nul;
- }
+ auto __c = _M_translator._M_translate(__ch);
+ auto __n = _M_translator._M_translate('\n');
+ auto __r = _M_translator._M_translate('\r');
+ auto __u2028 = _M_translator._M_translate(u'\u2028');
+ auto __u2029 = _M_translator._M_translate(u'\u2029');
+ return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
}
- const _TraitsT& _M_traits;
+ _TransT _M_translator;
};
- template<typename _TraitsT, bool __icase>
+ template<typename _TraitsT, bool __icase, bool __collate>
struct _CharMatcher
{
- typedef typename _TraitsT::char_type _CharT;
+ typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
+ typedef typename _TransT::_CharT _CharT;
_CharMatcher(_CharT __ch, const _TraitsT& __traits)
- : _M_traits(__traits), _M_ch(_M_translate(__ch))
+ : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
{ }
bool
operator()(_CharT __ch) const
- { return _M_ch == _M_translate(__ch); }
+ { return _M_ch == _M_translator._M_translate(__ch); }
- _CharT
- _M_translate(_CharT __ch) const
- {
- if (__icase)
- return _M_traits.translate_nocase(__ch);
- else
- return _M_traits.translate(__ch);
- }
-
- const _TraitsT& _M_traits;
- _CharT _M_ch;
+ _TransT _M_translator;
+ _CharT _M_ch;
};
/// Matches a character range (bracket expression)
- // TODO: Convert used _M_flags fields to template parameters, including
- // collate and icase. Avoid using std::set, could use flat_set
- // (sorted vector and binary search) instead.
- template<typename _TraitsT>
+ template<typename _TraitsT, bool __icase, bool __collate>
struct _BracketMatcher
{
public:
- typedef typename _TraitsT::char_type _CharT;
- typedef typename _TraitsT::char_class_type _CharClassT;
- typedef typename _TraitsT::string_type _StringT;
- typedef regex_constants::syntax_option_type _FlagT;
+ typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
+ typedef typename _TransT::_CharT _CharT;
+ typedef typename _TransT::_StrTransT _StrTransT;
+ typedef typename _TraitsT::string_type _StringT;
+ typedef typename _TraitsT::char_class_type _CharClassT;
public:
_BracketMatcher(bool __is_non_matching,
- const _TraitsT& __traits,
- _FlagT __flags)
- :
+ const _TraitsT& __traits)
+ : _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
+ _M_is_non_matching(__is_non_matching)
#ifdef _GLIBCXX_DEBUG
- _M_is_ready(false),
+ , _M_is_ready(false)
#endif
- _M_traits(__traits), _M_class_set(0), _M_flags(__flags),
- _M_is_non_matching(__is_non_matching)
{ }
bool
@@ -263,7 +331,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_add_char(_CharT __c)
{
- _M_char_set.insert(_M_translate(__c));
+ _M_char_set.push_back(_M_translator._M_translate(__c));
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
@@ -276,7 +344,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__s.data() + __s.size());
if (__st.empty())
__throw_regex_error(regex_constants::error_collate);
- _M_char_set.insert(_M_translate(__st[0]));
+ _M_char_set.push_back(_M_translator._M_translate(__st[0]));
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
@@ -291,7 +359,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_regex_error(regex_constants::error_collate);
__st = _M_traits.transform_primary(__st.data(),
__st.data() + __st.size());
- _M_equiv_set.insert(__st);
+ _M_equiv_set.push_back(__st);
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
@@ -302,7 +370,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
auto __mask = _M_traits.lookup_classname(__s.data(),
__s.data() + __s.size(),
- _M_is_icase());
+ __icase);
if (__mask == 0)
__throw_regex_error(regex_constants::error_ctype);
_M_class_set |= __mask;
@@ -314,12 +382,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_make_range(_CharT __l, _CharT __r)
{
- if (_M_flags & regex_constants::collate)
- _M_range_set.insert(
- make_pair(_M_get_str(_M_translate(__l)),
- _M_get_str(_M_translate(__r))));
- else
- _M_range_set.insert(make_pair(_M_get_str(__l), _M_get_str(__r)));
+ _M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
+ _M_translator._M_transform(__r)));
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
@@ -350,26 +414,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_apply(_CharT __ch, true_type) const
{ return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
- _CharT
- _M_translate(_CharT __c) const
- {
- if (_M_is_icase())
- return _M_traits.translate_nocase(__c);
- else
- return _M_traits.translate(__c);
- }
-
- bool
- _M_is_icase() const
- { return _M_flags & regex_constants::icase; }
-
- _StringT
- _M_get_str(_CharT __c) const
- {
- _StringT __s(1, __c);
- return _M_traits.transform(__s.begin(), __s.end());
- }
-
void
_M_make_cache(true_type)
{
@@ -383,16 +427,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ }
private:
- _CacheT _M_cache;
- std::set<_CharT> _M_char_set;
- std::set<_StringT> _M_equiv_set;
- std::set<pair<_StringT, _StringT>> _M_range_set;
- const _TraitsT& _M_traits;
- _CharClassT _M_class_set;
- _FlagT _M_flags;
- bool _M_is_non_matching;
+ _CacheT _M_cache;
+ std::vector<_CharT> _M_char_set;
+ std::vector<_StringT> _M_equiv_set;
+ std::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
+ _CharClassT _M_class_set;
+ _TransT _M_translator;
+ const _TraitsT& _M_traits;
+ bool _M_is_non_matching;
#ifdef _GLIBCXX_DEBUG
- bool _M_is_ready;
+ bool _M_is_ready;
#endif
};
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index 4da653f..621e43f 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -59,9 +59,9 @@ namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
- template<typename _FwdIter, typename _TraitsT>
- _Compiler<_FwdIter, _TraitsT>::
- _Compiler(_FwdIter __b, _FwdIter __e,
+ template<typename _TraitsT>
+ _Compiler<_TraitsT>::
+ _Compiler(_IterT __b, _IterT __e,
const _TraitsT& __traits, _FlagT __flags)
: _M_flags((__flags
& (regex_constants::ECMAScript
@@ -89,9 +89,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_nfa._M_eliminate_dummy();
}
- template<typename _FwdIter, typename _TraitsT>
+ template<typename _TraitsT>
void
- _Compiler<_FwdIter, _TraitsT>::
+ _Compiler<_TraitsT>::
_M_disjunction()
{
this->_M_alternative();
@@ -110,9 +110,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
}
- template<typename _FwdIter, typename _TraitsT>
+ template<typename _TraitsT>
void
- _Compiler<_FwdIter, _TraitsT>::
+ _Compiler<_TraitsT>::
_M_alternative()
{
if (this->_M_term())
@@ -126,9 +126,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
}
- template<typename _FwdIter, typename _TraitsT>
+ template<typename _TraitsT>
bool
- _Compiler<_FwdIter, _TraitsT>::
+ _Compiler<_TraitsT>::
_M_term()
{
if (this->_M_assertion())
@@ -141,9 +141,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false;
}
- template<typename _FwdIter, typename _TraitsT>
+ template<typename _TraitsT>
bool
- _Compiler<_FwdIter, _TraitsT>::
+ _Compiler<_TraitsT>::
_M_assertion()
{
if (_M_match_token(_ScannerT::_S_token_line_begin))
@@ -172,9 +172,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return true;
}
- template<typename _FwdIter, typename _TraitsT>
+ template<typename _TraitsT>
void
- _Compiler<_FwdIter, _TraitsT>::
+ _Compiler<_TraitsT>::
_M_quantifier()
{
bool __neg = (_M_flags & regex_constants::ECMAScript);
@@ -278,52 +278,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
}
- template<typename _FwdIter, typename _TraitsT>
+#define __INSERT_REGEX_MATCHER(__func, args...)\
+ do\
+ if (!(_M_flags & regex_constants::icase))\
+ if (!(_M_flags & regex_constants::collate))\
+ __func<false, false>(args);\
+ else\
+ __func<false, true>(args);\
+ else\
+ if (!(_M_flags & regex_constants::collate))\
+ __func<true, false>(args);\
+ else\
+ __func<true, true>(args);\
+ while (false)
+
+ template<typename _TraitsT>
bool
- _Compiler<_FwdIter, _TraitsT>::
+ _Compiler<_TraitsT>::
_M_atom()
{
if (_M_match_token(_ScannerT::_S_token_anychar))
{
- if (_M_flags & regex_constants::ECMAScript)
- _M_stack.push(_StateSeqT(_M_nfa,
- _M_nfa._M_insert_matcher
- (_AnyMatcher<_TraitsT,
- true>(_M_traits))));
+ if (!(_M_flags & regex_constants::ECMAScript))
+ __INSERT_REGEX_MATCHER(_M_insert_any_matcher_posix);
else
- _M_stack.push(_StateSeqT(_M_nfa,
- _M_nfa._M_insert_matcher
- (_AnyMatcher<_TraitsT,
- false>(_M_traits))));
+ __INSERT_REGEX_MATCHER(_M_insert_any_matcher_ecma);
}
else if (_M_try_char())
- {
- if (_M_flags & regex_constants::icase)
- _M_stack.push(_StateSeqT(_M_nfa,
- _M_nfa._M_insert_matcher
- (_CharMatcher<_TraitsT,
- true>(_M_value[0],
- _M_traits))));
- else
- _M_stack.push(_StateSeqT(_M_nfa,
- _M_nfa._M_insert_matcher
- (_CharMatcher<_TraitsT,
- false>(_M_value[0],
- _M_traits))));
- }
+ __INSERT_REGEX_MATCHER(_M_insert_char_matcher);
else if (_M_match_token(_ScannerT::_S_token_backref))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
_M_insert_backref(_M_cur_int_value(10))));
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
- {
- _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
- _BMatcherT __matcher(_M_ctype.is(_CtypeT::upper, _M_value[0]),
- _M_traits, _M_flags);
- __matcher._M_add_character_class(_M_value);
- __matcher._M_ready();
- _M_stack.push(_StateSeqT(_M_nfa,
- _M_nfa._M_insert_matcher(std::move(__matcher))));
- }
+ __INSERT_REGEX_MATCHER(_M_insert_character_class_matcher);
else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin))
{
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_dummy());
@@ -348,28 +335,90 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return true;
}
- template<typename _FwdIter, typename _TraitsT>
+ template<typename _TraitsT>
bool
- _Compiler<_FwdIter, _TraitsT>::
+ _Compiler<_TraitsT>::
_M_bracket_expression()
{
bool __neg =
_M_match_token(_ScannerT::_S_token_bracket_neg_begin);
if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin)))
return false;
- _BMatcherT __matcher(__neg, _M_traits, _M_flags);
+ __INSERT_REGEX_MATCHER(_M_insert_bracket_matcher, __neg);
+ return true;
+ }
+#undef __INSERT_REGEX_MATCHER
+
+ template<typename _TraitsT>
+ template<bool __icase, bool __collate>
+ void
+ _Compiler<_TraitsT>::
+ _M_insert_any_matcher_ecma()
+ {
+ _M_stack.push(_StateSeqT(_M_nfa,
+ _M_nfa._M_insert_matcher
+ (_AnyMatcher<_TraitsT, true, __icase, __collate>
+ (_M_traits))));
+ }
+
+ template<typename _TraitsT>
+ template<bool __icase, bool __collate>
+ void
+ _Compiler<_TraitsT>::
+ _M_insert_any_matcher_posix()
+ {
+ _M_stack.push(_StateSeqT(_M_nfa,
+ _M_nfa._M_insert_matcher
+ (_AnyMatcher<_TraitsT, false, __icase, __collate>
+ (_M_traits))));
+ }
+
+ template<typename _TraitsT>
+ template<bool __icase, bool __collate>
+ void
+ _Compiler<_TraitsT>::
+ _M_insert_char_matcher()
+ {
+ _M_stack.push(_StateSeqT(_M_nfa,
+ _M_nfa._M_insert_matcher
+ (_CharMatcher<_TraitsT, __icase, __collate>
+ (_M_value[0], _M_traits))));
+ }
+
+ template<typename _TraitsT>
+ template<bool __icase, bool __collate>
+ void
+ _Compiler<_TraitsT>::
+ _M_insert_character_class_matcher()
+ {
+ _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
+ _BracketMatcher<_TraitsT, __icase, __collate> __matcher
+ (_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits);
+ __matcher._M_add_character_class(_M_value);
+ __matcher._M_ready();
+ _M_stack.push(_StateSeqT(_M_nfa,
+ _M_nfa._M_insert_matcher(std::move(__matcher))));
+ }
+
+ template<typename _TraitsT>
+ template<bool __icase, bool __collate>
+ void
+ _Compiler<_TraitsT>::
+ _M_insert_bracket_matcher(bool __neg)
+ {
+ _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
while (!_M_match_token(_ScannerT::_S_token_bracket_end))
_M_expression_term(__matcher);
__matcher._M_ready();
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(std::move(__matcher))));
- return true;
}
- template<typename _FwdIter, typename _TraitsT>
+ template<typename _TraitsT>
+ template<bool __icase, bool __collate>
void
- _Compiler<_FwdIter, _TraitsT>::
- _M_expression_term(_BMatcherT& __matcher)
+ _Compiler<_TraitsT>::
+ _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_collsymbol))
__matcher._M_add_collating_element(_M_value);
@@ -403,9 +452,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_regex_error(regex_constants::error_brack);
}
- template<typename _FwdIter, typename _TraitsT>
+ template<typename _TraitsT>
bool
- _Compiler<_FwdIter, _TraitsT>::
+ _Compiler<_TraitsT>::
_M_try_char()
{
bool __is_char = false;
@@ -424,9 +473,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __is_char;
}
- template<typename _FwdIter, typename _TraitsT>
+ template<typename _TraitsT>
bool
- _Compiler<_FwdIter, _TraitsT>::
+ _Compiler<_TraitsT>::
_M_match_token(_TokenT token)
{
if (token == _M_scanner._M_get_token())
@@ -438,9 +487,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false;
}
- template<typename _FwdIter, typename _TraitsT>
+ template<typename _TraitsT>
int
- _Compiler<_FwdIter, _TraitsT>::
+ _Compiler<_TraitsT>::
_M_cur_int_value(int __radix)
{
long __v = 0;
@@ -450,25 +499,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __v;
}
- template<typename _TraitsT>
+ template<typename _TraitsT, bool __icase, bool __collate>
bool
- _BracketMatcher<_TraitsT>::_M_apply(_CharT __ch, false_type) const
+ _BracketMatcher<_TraitsT, __icase, __collate>::
+ _M_apply(_CharT __ch, false_type) const
{
bool __ret = false;
- if (_M_traits.isctype(__ch, _M_class_set)
- || _M_char_set.count(_M_translate(__ch))
- || _M_equiv_set.count(_M_traits.transform_primary(&__ch, &__ch+1)))
+ if (std::find(_M_char_set.begin(), _M_char_set.end(),
+ _M_translator._M_translate(__ch))
+ != _M_char_set.end())
__ret = true;
else
{
- _StringT __s = _M_get_str(_M_flags & regex_constants::collate
- ? _M_translate(__ch) : __ch);
+ auto __s = _M_translator._M_transform(__ch);
for (auto& __it : _M_range_set)
if (__it.first <= __s && __s <= __it.second)
{
__ret = true;
break;
}
+ if (_M_traits.isctype(__ch, _M_class_set))
+ __ret = true;
+ else if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(),
+ _M_traits.transform_primary(&__ch, &__ch+1))
+ != _M_equiv_set.end())
+ __ret = true;
}
if (_M_is_non_matching)
return !__ret;
diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h
index bed9014..0885716 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -65,7 +65,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_nfa(*__re._M_automaton),
_M_results(__results),
_M_match_queue(__dfs_mode ? nullptr
- : new queue<pair<_StateIdT, _ResultsVec>>()),
+ : new vector<pair<_StateIdT, _ResultsVec>>()),
_M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())),
_M_flags((__flags & regex_constants::match_prev_avail)
? (__flags
@@ -133,23 +133,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_lookahead(_State<_TraitsT> __state);
public:
- _ResultsVec _M_cur_results;
- _BiIter _M_current;
- const _BiIter _M_begin;
- const _BiIter _M_end;
- const _RegexT& _M_re;
- const _NFAT& _M_nfa;
- _ResultsVec& _M_results;
+ _ResultsVec _M_cur_results;
+ _BiIter _M_current;
+ const _BiIter _M_begin;
+ const _BiIter _M_end;
+ const _RegexT& _M_re;
+ const _NFAT& _M_nfa;
+ _ResultsVec& _M_results;
// Used in BFS, saving states that need to be considered for the next
// character.
- std::unique_ptr<queue<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
+ std::unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
// Used in BFS, indicating that which state is already visited.
- std::unique_ptr<vector<bool>> _M_visited;
- _FlagT _M_flags;
+ std::unique_ptr<vector<bool>> _M_visited;
+ _FlagT _M_flags;
// To record current solution.
- _StateIdT _M_start_state;
+ _StateIdT _M_start_state;
// Do we have a solution so far?
- bool _M_has_sol;
+ bool _M_has_sol;
};
//@} regex-detail
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc
index 85b70b8..e1cfcb0 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -111,7 +111,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
else
{
- _M_match_queue->push(make_pair(_M_start_state, _M_results));
+ _M_match_queue->push_back(make_pair(_M_start_state, _M_results));
bool __ret = false;
while (1)
{
@@ -120,10 +120,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
break;
_M_visited->assign(_M_visited->size(), false);
auto _M_old_queue = std::move(*_M_match_queue);
- while (!_M_old_queue.empty())
+ for (auto __task : _M_old_queue)
{
- auto __task = _M_old_queue.front();
- _M_old_queue.pop();
_M_cur_results = __task.second;
_M_dfs<__match_mode>(__task.first);
}
@@ -279,7 +277,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
else
if (__state._M_matches(*_M_current))
- _M_match_queue->push(make_pair(__state._M_next, _M_cur_results));
+ _M_match_queue->push_back(make_pair(__state._M_next,
+ _M_cur_results));
break;
// First fetch the matched result from _M_cur_results as __submatch;
// then compare it with
diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h
index d113c5d..6dc2b4e 100644
--- a/libstdc++-v3/include/bits/regex_scanner.h
+++ b/libstdc++-v3/include/bits/regex_scanner.h
@@ -39,6 +39,154 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{
*/
+ struct _ScannerBase
+ {
+ public:
+ /// Token types returned from the scanner.
+ enum _TokenT
+ {
+ _S_token_anychar,
+ _S_token_ord_char,
+ _S_token_oct_num,
+ _S_token_hex_num,
+ _S_token_backref,
+ _S_token_subexpr_begin,
+ _S_token_subexpr_no_group_begin,
+ _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
+ _S_token_subexpr_end,
+ _S_token_bracket_begin,
+ _S_token_bracket_neg_begin,
+ _S_token_bracket_end,
+ _S_token_interval_begin,
+ _S_token_interval_end,
+ _S_token_quoted_class,
+ _S_token_char_class_name,
+ _S_token_collsymbol,
+ _S_token_equiv_class_name,
+ _S_token_opt,
+ _S_token_or,
+ _S_token_closure0,
+ _S_token_closure1,
+ _S_token_ungreedy,
+ _S_token_line_begin,
+ _S_token_line_end,
+ _S_token_word_bound, // neg if _M_value[0] == 'n'
+ _S_token_comma,
+ _S_token_dup_count,
+ _S_token_eof,
+ _S_token_unknown
+ };
+
+ protected:
+ typedef regex_constants::syntax_option_type _FlagT;
+
+ enum _StateT
+ {
+ _S_state_normal,
+ _S_state_in_brace,
+ _S_state_in_bracket,
+ };
+
+ protected:
+ _ScannerBase(_FlagT __flags)
+ : _M_state(_S_state_normal),
+ _M_flags(__flags),
+ _M_escape_tbl(_M_is_ecma()
+ ? _M_ecma_escape_tbl
+ : _M_awk_escape_tbl),
+ _M_spec_char(_M_is_ecma()
+ ? _M_ecma_spec_char
+ : _M_is_basic()
+ ? _M_basic_spec_char
+ : _M_extended_spec_char),
+ _M_at_bracket_start(false)
+ { }
+
+ protected:
+ const char*
+ _M_find_escape(char __c)
+ {
+ auto __it = _M_escape_tbl;
+ for (; __it->first != '\0'; ++__it)
+ if (__it->first == __c)
+ return &__it->second;
+ return nullptr;
+ }
+
+ bool
+ _M_is_ecma() const
+ { return _M_flags & regex_constants::ECMAScript; }
+
+ bool
+ _M_is_basic() const
+ { return _M_flags & (regex_constants::basic | regex_constants::grep); }
+
+ bool
+ _M_is_extended() const
+ {
+ return _M_flags & (regex_constants::extended
+ | regex_constants::egrep
+ | regex_constants::awk);
+ }
+
+ bool
+ _M_is_grep() const
+ { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
+
+ bool
+ _M_is_awk() const
+ { return _M_flags & regex_constants::awk; }
+
+ protected:
+ const std::pair<char, _TokenT> _M_token_tbl[9] =
+ {
+ {'^', _S_token_line_begin},
+ {'$', _S_token_line_end},
+ {'.', _S_token_anychar},
+ {'*', _S_token_closure0},
+ {'+', _S_token_closure1},
+ {'?', _S_token_opt},
+ {'|', _S_token_or},
+ {'\n', _S_token_or}, // grep and egrep
+ {'\0', _S_token_or},
+ };
+ const std::pair<char, char> _M_ecma_escape_tbl[8] =
+ {
+ {'0', '\0'},
+ {'b', '\b'},
+ {'f', '\f'},
+ {'n', '\n'},
+ {'r', '\r'},
+ {'t', '\t'},
+ {'v', '\v'},
+ {'\0', '\0'},
+ };
+ const std::pair<char, char> _M_awk_escape_tbl[11] =
+ {
+ {'"', '"'},
+ {'/', '/'},
+ {'\\', '\\'},
+ {'a', '\a'},
+ {'b', '\b'},
+ {'f', '\f'},
+ {'n', '\n'},
+ {'r', '\r'},
+ {'t', '\t'},
+ {'v', '\v'},
+ {'\0', '\0'},
+ };
+ const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
+ const char* _M_basic_spec_char = ".[\\*^$";
+ const char* _M_extended_spec_char = ".[\\()*+?{|^$";
+
+ _StateT _M_state;
+ _FlagT _M_flags;
+ _TokenT _M_token;
+ const std::pair<char, char>* _M_escape_tbl;
+ const char* _M_spec_char;
+ bool _M_at_bracket_start;
+ };
+
/**
* @brief struct _Scanner. Scans an input range for regex tokens.
*
@@ -49,51 +197,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* constructor: different regular expression grammars will interpret
* the same input pattern in syntactically different ways.
*/
- template<typename _FwdIter>
+ template<typename _CharT>
class _Scanner
+ : public _ScannerBase
{
public:
- typedef typename std::iterator_traits<_FwdIter>::value_type _CharT;
+ typedef const _CharT* _IterT;
typedef std::basic_string<_CharT> _StringT;
typedef regex_constants::syntax_option_type _FlagT;
typedef const std::ctype<_CharT> _CtypeT;
- /// Token types returned from the scanner.
- enum _TokenT
- {
- _S_token_anychar,
- _S_token_ord_char,
- _S_token_oct_num,
- _S_token_hex_num,
- _S_token_backref,
- _S_token_subexpr_begin,
- _S_token_subexpr_no_group_begin,
- _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
- _S_token_subexpr_end,
- _S_token_bracket_begin,
- _S_token_bracket_neg_begin,
- _S_token_bracket_end,
- _S_token_interval_begin,
- _S_token_interval_end,
- _S_token_quoted_class,
- _S_token_char_class_name,
- _S_token_collsymbol,
- _S_token_equiv_class_name,
- _S_token_opt,
- _S_token_or,
- _S_token_closure0,
- _S_token_closure1,
- _S_token_ungreedy,
- _S_token_line_begin,
- _S_token_line_end,
- _S_token_word_bound, // neg if _M_value[0] == 'n'
- _S_token_comma,
- _S_token_dup_count,
- _S_token_eof,
- _S_token_unknown
- };
-
- _Scanner(_FwdIter __begin, _FwdIter __end,
+ _Scanner(_IterT __begin, _IterT __end,
_FlagT __flags, std::locale __loc);
void
@@ -113,13 +227,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
#endif
private:
- enum _StateT
- {
- _S_state_normal,
- _S_state_in_brace,
- _S_state_in_bracket,
- };
-
void
_M_scan_normal();
@@ -141,49 +248,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_eat_class(char);
- constexpr bool
- _M_is_ecma()
- { return _M_flags & regex_constants::ECMAScript; }
-
- constexpr bool
- _M_is_basic()
- { return _M_flags & (regex_constants::basic | regex_constants::grep); }
-
- constexpr bool
- _M_is_extended()
- {
- return _M_flags & (regex_constants::extended
- | regex_constants::egrep
- | regex_constants::awk);
- }
-
- constexpr bool
- _M_is_grep()
- { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
-
- constexpr bool
- _M_is_awk()
- { return _M_flags & regex_constants::awk; }
-
- _StateT _M_state;
- _FwdIter _M_current;
- _FwdIter _M_end;
- _FlagT _M_flags;
+ _IterT _M_current;
+ _IterT _M_end;
_CtypeT& _M_ctype;
- _TokenT _M_token;
_StringT _M_value;
- bool _M_at_bracket_start;
- public:
- // FIXME: make them static when this file is stable.
- const std::map<char, _TokenT> _M_token_map;
- const std::map<char, char> _M_ecma_escape_map;
- const std::map<char, char> _M_awk_escape_map;
- const std::set<char> _M_ecma_spec_char;
- const std::set<char> _M_basic_spec_char;
- const std::set<char> _M_extended_spec_char;
-
- const std::map<char, char>& _M_escape_map;
- const std::set<char>& _M_spec_char;
void (_Scanner::* _M_eat_escape)();
};
diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc
index 34d78ec..d954d07 100644
--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -52,106 +52,22 @@ namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
- template<typename _FwdIter>
- _Scanner<_FwdIter>::
- _Scanner(_FwdIter __begin, _FwdIter __end,
+ template<typename _CharT>
+ _Scanner<_CharT>::
+ _Scanner(typename _Scanner::_IterT __begin,
+ typename _Scanner::_IterT __end,
_FlagT __flags, std::locale __loc)
- : _M_state(_S_state_normal), _M_current(__begin), _M_end(__end),
- _M_flags(__flags),
+ : _ScannerBase(__flags),
+ _M_current(__begin), _M_end(__end),
_M_ctype(std::use_facet<_CtypeT>(__loc)),
- _M_at_bracket_start(false),
- _M_token_map
- {
- {'^', _S_token_line_begin},
- {'$', _S_token_line_end},
- {'.', _S_token_anychar},
- {'*', _S_token_closure0},
- {'+', _S_token_closure1},
- {'?', _S_token_opt},
- {'|', _S_token_or},
- // grep and egrep
- {'\n', _S_token_or},
- },
- _M_ecma_escape_map
- {
- {'0', '\0'},
- {'b', '\b'},
- {'f', '\f'},
- {'n', '\n'},
- {'r', '\r'},
- {'t', '\t'},
- {'v', '\v'},
- },
- _M_awk_escape_map
- {
- {'"', '"'},
- {'/', '/'},
- {'\\', '\\'},
- {'a', '\a'},
- {'b', '\b'},
- {'f', '\f'},
- {'n', '\n'},
- {'r', '\r'},
- {'t', '\t'},
- {'v', '\v'},
- },
- _M_ecma_spec_char
- {
- '^',
- '$',
- '\\',
- '.',
- '*',
- '+',
- '?',
- '(',
- ')',
- '[',
- ']',
- '{',
- '}',
- '|',
- },
- _M_basic_spec_char
- {
- '.',
- '[',
- '\\',
- '*',
- '^',
- '$',
- },
- _M_extended_spec_char
- {
- '.',
- '[',
- '\\',
- '(',
- ')',
- '*',
- '+',
- '?',
- '{',
- '|',
- '^',
- '$',
- },
- _M_escape_map(_M_is_ecma()
- ? _M_ecma_escape_map
- : _M_awk_escape_map),
- _M_spec_char(_M_is_ecma()
- ? _M_ecma_spec_char
- : _M_is_basic()
- ? _M_basic_spec_char
- : _M_extended_spec_char),
_M_eat_escape(_M_is_ecma()
? &_Scanner::_M_eat_escape_ecma
: &_Scanner::_M_eat_escape_posix)
{ _M_advance(); }
- template<typename _FwdIter>
+ template<typename _CharT>
void
- _Scanner<_FwdIter>::
+ _Scanner<_CharT>::
_M_advance()
{
if (_M_current == _M_end)
@@ -173,12 +89,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles:
// 1) "\(", "\)", "\{" in basic. It's not escaping.
// 2) "(?:", "(?=", "(?!" in ECMAScript.
- template<typename _FwdIter>
+ template<typename _CharT>
void
- _Scanner<_FwdIter>::
+ _Scanner<_CharT>::
_M_scan_normal()
{
auto __c = *_M_current++;
+ const char* __pos;
if (__c == '\\')
{
@@ -244,11 +161,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_state = _S_state_in_brace;
_M_token = _S_token_interval_begin;
}
- else if ((_M_spec_char.count(_M_ctype.narrow(__c, '\0'))
+ else if (((__pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')))
+ != nullptr
+ && *__pos != '\0'
&& __c != ']'
&& __c != '}')
|| (_M_is_grep() && __c == '\n'))
- _M_token = _M_token_map.at(__c);
+ {
+ auto __it = _M_token_tbl;
+ auto __narrowc = _M_ctype.narrow(__c, '\0');
+ for (; __it->first != '\0'; ++__it)
+ if (__it->first == __narrowc)
+ {
+ _M_token = __it->second;
+ return;
+ }
+ _GLIBCXX_DEBUG_ASSERT(false);
+ }
else
{
_M_token = _S_token_ord_char;
@@ -259,9 +188,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles:
// 1) different semantics of "[]" and "[^]".
// 2) Escaping in bracket expr.
- template<typename _FwdIter>
+ template<typename _CharT>
void
- _Scanner<_FwdIter>::
+ _Scanner<_CharT>::
_M_scan_in_bracket()
{
if (_M_current == _M_end)
@@ -316,9 +245,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles:
// 1) "\}" in basic style.
- template<typename _FwdIter>
+ template<typename _CharT>
void
- _Scanner<_FwdIter>::
+ _Scanner<_CharT>::
_M_scan_in_brace()
{
if (_M_current == _M_end)
@@ -357,21 +286,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_regex_error(regex_constants::error_badbrace);
}
- template<typename _FwdIter>
+ template<typename _CharT>
void
- _Scanner<_FwdIter>::
+ _Scanner<_CharT>::
_M_eat_escape_ecma()
{
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_escape);
auto __c = *_M_current++;
+ auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
- if (_M_escape_map.count(_M_ctype.narrow(__c, '\0'))
- && (__c != 'b' || _M_state == _S_state_in_bracket))
+ if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket))
{
_M_token = _S_token_ord_char;
- _M_value.assign(1, _M_escape_map.at(__c));
+ _M_value.assign(1, *__pos);
}
else if (__c == 'b')
{
@@ -431,17 +360,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles:
// 1) Extended doesn't support backref, but basic does.
- template<typename _FwdIter>
+ template<typename _CharT>
void
- _Scanner<_FwdIter>::
+ _Scanner<_CharT>::
_M_eat_escape_posix()
{
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_escape);
auto __c = *_M_current;
+ auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
- if (_M_spec_char.count(_M_ctype.narrow(__c, '\0')))
+ if (__pos != nullptr && *__pos != '\0')
{
_M_token = _S_token_ord_char;
_M_value.assign(1, __c);
@@ -469,17 +399,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
++_M_current;
}
- template<typename _FwdIter>
+ template<typename _CharT>
void
- _Scanner<_FwdIter>::
+ _Scanner<_CharT>::
_M_eat_escape_awk()
{
auto __c = *_M_current++;
+ auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
- if (_M_escape_map.count(_M_ctype.narrow(__c, '\0')))
+ if (__pos != nullptr)
{
_M_token = _S_token_ord_char;
- _M_value.assign(1, _M_escape_map.at(__c));
+ _M_value.assign(1, *__pos);
}
// \ddd for oct representation
else if (_M_ctype.is(_CtypeT::digit, __c)
@@ -505,9 +436,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Eats a character class or throwns an exception.
// __ch cound be ':', '.' or '=', _M_current is the char after ']' when
// returning.
- template<typename _FwdIter>
+ template<typename _CharT>
void
- _Scanner<_FwdIter>::
+ _Scanner<_CharT>::
_M_eat_class(char __ch)
{
for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;)
@@ -525,9 +456,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
#ifdef _GLIBCXX_DEBUG
- template<typename _FwdIter>
+ template<typename _CharT>
std::ostream&
- _Scanner<_FwdIter>::
+ _Scanner<_CharT>::
_M_print(std::ostream& ostr)
{
switch (_M_token)
diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex
index 9395f50..9161f48 100644
--- a/libstdc++-v3/include/std/regex
+++ b/libstdc++-v3/include/std/regex
@@ -44,15 +44,13 @@
#include <iterator>
#include <locale>
#include <memory>
-#include <map>
-#include <queue>
-#include <set>
#include <sstream>
#include <stack>
#include <stdexcept>
#include <string>
#include <utility>
#include <vector>
+#include <cstring>
#include <bits/regex_constants.h>
#include <bits/regex_error.h>