diff options
author | Tim Shen <timshen91@gmail.com> | 2013-08-03 14:32:54 +0000 |
---|---|---|
committer | Tim Shen <timshen@gcc.gnu.org> | 2013-08-03 14:32:54 +0000 |
commit | 399eeef9786cb661aa86981ee9e7a9f3dec373d6 (patch) | |
tree | cd6382282eec0df5837cd32d833217284b86cf42 | |
parent | cdaa808aedfc4b5363197ac119ef976a5d66e5a4 (diff) | |
download | gcc-399eeef9786cb661aa86981ee9e7a9f3dec373d6.zip gcc-399eeef9786cb661aa86981ee9e7a9f3dec373d6.tar.gz gcc-399eeef9786cb661aa86981ee9e7a9f3dec373d6.tar.bz2 |
Implement bracket expression.
2013-08-03 Tim Shen <timshen91@gmail.com>
Implement bracket expression.
* include/bits/regex.h: Remove constexpr from "|=", etc.
* include/bits/regex_compiler.h: Parse bracket expression.
* include/bits/regex_nfa.h: _Comparator and _BracketMatcher(old
_RangeMatcher).
* include/bits/regex_nfa.tcc: Implement them.
* testsuite/28_regex/algorithms/regex_match/extended/53622.cc:
from regex_search to regex_match.
* testsuite/28_regex/algorithms/regex_match/extended/
cstring_bracket_01.cc: New.
From-SVN: r201465
-rw-r--r-- | libstdc++-v3/ChangeLog | 13 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex.h | 9 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_compiler.h | 273 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_nfa.h | 100 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_nfa.tcc | 58 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc | 4 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc | 66 |
7 files changed, 331 insertions, 192 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 8c82095..a1a690d 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,16 @@ +2013-08-03 Tim Shen <timshen91@gmail.com> + + Implement bracket expression. + * include/bits/regex.h: Remove constexpr from "|=", etc. + * include/bits/regex_compiler.h: Parse bracket expression. + * include/bits/regex_nfa.h: _Comparator and _BracketMatcher(old + _RangeMatcher). + * include/bits/regex_nfa.tcc: Implement them. + * testsuite/28_regex/algorithms/regex_match/extended/53622.cc: + from regex_search to regex_match. + * testsuite/28_regex/algorithms/regex_match/extended/ + cstring_bracket_01.cc: New. + 2013-08-02 Paolo Carlini <paolo.carlini@oracle.com> * include/debug/functions.h (__foreign_iterator_aux4): diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index c1f6533..fd8f134 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -95,15 +95,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION operator~() const { return _RegexMask(~_M_base, ~_M_extended); } - constexpr _RegexMask& + _RegexMask& operator&=(_RegexMask __other) { return *this = (*this) & __other; } - constexpr _RegexMask& + _RegexMask& operator|=(_RegexMask __other) { return *this = (*this) | __other; } - constexpr _RegexMask& + _RegexMask& operator^=(_RegexMask __other) { return *this = (*this) ^ __other; } @@ -228,7 +228,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __fctyp.tolower(&*__v.begin(), &*__v.end()); return this->transform(&*__v.begin(), &*__v.end()); } - __catch (...) + __catch (std::bad_cast) { } return string_type(); @@ -519,7 +519,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION }; std::string __s(__last - __first, '?'); - string_type a(__first, __last); __fctyp.narrow(__first, __last, '?', &*__s.begin()); for (unsigned int __i = 0; *__collatenames[__i]; __i++) diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index badea8c..2cc9da1 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -44,9 +44,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { typedef unsigned int _StateT; - static constexpr _StateT _S_state_at_start = 1 << 0; - static constexpr _StateT _S_state_in_brace = 1 << 2; - static constexpr _StateT _S_state_in_bracket = 1 << 3; + static constexpr _StateT _S_state_in_brace = 1 << 0; + static constexpr _StateT _S_state_in_bracket = 1 << 1; virtual ~_Scanner_base() { }; }; @@ -77,8 +76,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _S_token_anychar, _S_token_backref, _S_token_bracket_begin, + _S_token_bracket_inverse_begin, _S_token_bracket_end, - _S_token_inverse_class, _S_token_char_class_name, _S_token_closure0, _S_token_closure1, @@ -97,7 +96,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _S_token_opt, _S_token_or, _S_token_ord_char, - _S_token_quoted_char, _S_token_subexpr_begin, _S_token_subexpr_end, _S_token_word_begin, @@ -108,7 +106,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags, std::locale __loc) : _M_current(__begin) , _M_end(__end) , _M_flags(__flags), - _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start) + _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0) { _M_advance(); } void @@ -219,9 +217,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } else if (__c == _M_ctype.widen('[')) { - _M_curToken = _S_token_bracket_begin; - _M_state |= (_S_state_in_bracket | _S_state_at_start); - ++_M_current; + if (*++_M_current == _M_ctype.widen('^')) + { + _M_curToken = _S_token_bracket_inverse_begin; + ++_M_current; + } + else + _M_curToken = _S_token_bracket_begin; + _M_state |= _S_state_in_bracket; return; } else if (__c == _M_ctype.widen('\\')) @@ -304,14 +307,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Scanner<_InputIterator>:: _M_scan_in_bracket() { - if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^')) - { - _M_curToken = _S_token_inverse_class; - _M_state &= ~_S_state_at_start; - ++_M_current; - return; - } - else if (*_M_current == _M_ctype.widen('[')) + if (*_M_current == _M_ctype.widen('[')) { ++_M_current; if (_M_current == _M_end) @@ -347,21 +343,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } else if (*_M_current == _M_ctype.widen(']')) { - if (!(_M_flags & regex_constants::ECMAScript) - || !(_M_state & _S_state_at_start)) - { - // special case: only if _not_ chr first after - // '[' or '[^' and if not ECMAscript - _M_curToken = _S_token_bracket_end; - ++_M_current; - return; - } + _M_curToken = _S_token_bracket_end; + _M_state &= ~_S_state_in_bracket; + ++_M_current; + return; } + else if (*_M_current == _M_ctype.widen('\\')) + { + _M_eat_escape(); + return; + } _M_curToken = _S_token_collelem_single; _M_curValue.assign(1, *_M_current); ++_M_current; } + // TODO implement it. template<typename _InputIterator> void _Scanner<_InputIterator>:: @@ -463,11 +460,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_curToken = _S_token_backref; _M_curValue.assign(1, __c); } + else if (_M_state & _S_state_in_bracket) + { + if (__c == _M_ctype.widen('-') + || __c == _M_ctype.widen('[') + || __c == _M_ctype.widen(']')) + { + _M_curToken = _S_token_ord_char; + _M_curValue.assign(1, __c); + } + else if ((_M_flags & regex_constants::ECMAScript) + && __c == _M_ctype.widen('b')) + { + _M_curToken = _S_token_ord_char; + _M_curValue.assign(1, _M_ctype.widen(' ')); + } + else + __throw_regex_error(regex_constants::error_escape); + } else __throw_regex_error(regex_constants::error_escape); } - // Eats a character class or throwns an exception. // current point to ':' delimiter on entry, char after ']' on return template<typename _InputIterator> @@ -549,6 +563,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION case _S_token_bracket_begin: ostr << "bracket-begin\n"; break; + case _S_token_bracket_inverse_begin: + ostr << "bracket-inverse-begin\n"; + break; case _S_token_bracket_end: ostr << "bracket-end\n"; break; @@ -606,9 +623,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION case _S_token_ord_char: ostr << "ordinary character: \"" << _M_value() << "\"\n"; break; - case _S_token_quoted_char: - ostr << "quoted char\n"; - break; case _S_token_subexpr_begin: ostr << "subexpr begin\n"; break; @@ -624,6 +638,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION case _S_token_unknown: ostr << "-- unknown token --\n"; break; + default: + _GLIBCXX_DEBUG_ASSERT(false); } return ostr; } @@ -650,7 +666,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typedef _Scanner<_InIter> _ScannerT; typedef typename _ScannerT::_TokenT _TokenT; typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT; - typedef _RangeMatcher<_InIter, _TraitsT> _RMatcherT; + typedef _BracketMatcher<_InIter, _TraitsT> _BMatcherT; // accepts a specific token or returns false. bool @@ -659,7 +675,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_disjunction(); - bool + void _M_alternative(); bool @@ -668,7 +684,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool _M_assertion(); - bool + void _M_quantifier(); bool @@ -678,31 +694,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_bracket_expression(); bool - _M_bracket_list(_RMatcherT& __matcher); + _M_bracket_list(_BMatcherT& __matcher); bool - _M_follow_list(_RMatcherT& __matcher); + _M_follow_list(_BMatcherT& __matcher); - bool - _M_follow_list2(_RMatcherT& __matcher); - - bool - _M_expression_term(_RMatcherT& __matcher); + void + _M_expression_term(_BMatcherT& __matcher); bool - _M_range_expression(_RMatcherT& __matcher); + _M_range_expression(_BMatcherT& __matcher); bool - _M_start_range(_RMatcherT& __matcher); + _M_start_range(_BMatcherT& __matcher); bool - _M_collating_symbol(_RMatcherT& __matcher); + _M_collating_symbol(_BMatcherT& __matcher); bool - _M_equivalence_class(_RMatcherT& __matcher); + _M_equivalence_class(_BMatcherT& __matcher); bool - _M_character_class(_RMatcherT& __matcher); + _M_character_class(_BMatcherT& __matcher); int _M_cur_int_value(int __radix); @@ -712,6 +725,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StringT _M_cur_value; _Nfa _M_state_store; _StackT _M_stack; + _FlagT _M_flags; }; template<typename _InIter, typename _TraitsT> @@ -719,7 +733,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits, _Compiler<_InIter, _TraitsT>::_FlagT __flags) : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()), - _M_state_store(__flags) + _M_state_store(__flags), _M_flags(__flags) { typedef _StartTagger<_InIter, _TraitsT> _Start; typedef _EndTagger<_InIter, _TraitsT> _End; @@ -743,8 +757,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { if (token == _M_scanner._M_token()) { - _M_cur_value = _M_scanner._M_value(); - _M_scanner._M_advance(); + _M_cur_value = _M_scanner._M_value(); + _M_scanner._M_advance(); return true; } return false; @@ -766,7 +780,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } template<typename _InIter, typename _TraitsT> - bool + void _Compiler<_InIter, _TraitsT>:: _M_alternative() { @@ -780,9 +794,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_stack.pop(); } _M_stack.push(__re); - return true; } - return false; } template<typename _InIter, typename _TraitsT> @@ -829,7 +841,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } template<typename _InIter, typename _TraitsT> - bool + void _Compiler<_InIter, _TraitsT>:: _M_quantifier() { @@ -841,7 +853,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __r._M_append(__r._M_front()); _M_stack.pop(); _M_stack.push(__r); - return true; + return; } if (_M_match_token(_ScannerT::_S_token_closure1)) { @@ -852,7 +864,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_insert_alt(_S_invalid_state_id, _M_stack.top()._M_front())); _M_stack.top()._M_append(__r); - return true; + return; } if (_M_match_token(_ScannerT::_S_token_opt)) { @@ -861,7 +873,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StateSeq __r(_M_stack.top(), -1); _M_stack.pop(); _M_stack.push(__r); - return true; + return; } if (_M_match_token(_ScannerT::_S_token_interval_begin)) { @@ -897,9 +909,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } if (!_M_match_token(_ScannerT::_S_token_interval_end)) __throw_regex_error(regex_constants::error_brace); - return true; + return; } - return false; } template<typename _InIter, typename _TraitsT> @@ -922,15 +933,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { _M_stack.push(_StateSeq(_M_state_store, _M_state_store._M_insert_matcher - (_CMatcher(_M_cur_value[0], _M_traits)))); - return true; - } - if (_M_match_token(_ScannerT::_S_token_quoted_char)) - { - // note that in the ECMA grammar, this case covers backrefs. - _M_stack.push(_StateSeq(_M_state_store, - _M_state_store._M_insert_matcher - (_CMatcher(_M_cur_value[0], _M_traits)))); + (_CMatcher(_M_cur_value[0], _M_flags, _M_traits)))); return true; } if (_M_match_token(_ScannerT::_S_token_backref)) @@ -966,124 +969,74 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Compiler<_InIter, _TraitsT>:: _M_bracket_expression() { - if (_M_match_token(_ScannerT::_S_token_bracket_begin)) - { - _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin), - _M_traits); - if (!_M_bracket_list(__matcher) - || !_M_match_token(_ScannerT::_S_token_bracket_end)) - __throw_regex_error(regex_constants::error_brack); - _M_stack.push(_StateSeq(_M_state_store, - _M_state_store._M_insert_matcher(__matcher))); - return true; - } - return false; - } - - // If the dash is the last character in the bracket expression, it is not - // special. - template<typename _InIter, typename _TraitsT> - bool - _Compiler<_InIter, _TraitsT>:: - _M_bracket_list(_RMatcherT& __matcher) - { - if (_M_follow_list(__matcher)) - { - if (_M_match_token(_ScannerT::_S_token_dash)) - __matcher._M_add_char(_M_cur_value[0]); - return true; - } - return false; - } - - template<typename _InIter, typename _TraitsT> - bool - _Compiler<_InIter, _TraitsT>:: - _M_follow_list(_RMatcherT& __matcher) - { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); } - - template<typename _InIter, typename _TraitsT> - bool - _Compiler<_InIter, _TraitsT>:: - _M_follow_list2(_RMatcherT& __matcher) - { - if (_M_expression_term(__matcher)) - return _M_follow_list2(__matcher); + bool __inverse = + _M_match_token(_ScannerT::_S_token_bracket_inverse_begin); + if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin))) + return false; + _BMatcherT __matcher( __inverse, _M_flags, _M_traits); + // special case: only if _not_ chr first after + // '[' or '[^' or if ECMAscript + if (!_M_bracket_list(__matcher) // list is empty + && !(_M_flags & regex_constants::ECMAScript)) + __throw_regex_error(regex_constants::error_brack); + _M_stack.push(_StateSeq(_M_state_store, + _M_state_store._M_insert_matcher(__matcher))); return true; } template<typename _InIter, typename _TraitsT> - bool + bool // list is non-empty _Compiler<_InIter, _TraitsT>:: - _M_expression_term(_RMatcherT& __matcher) + _M_bracket_list(_BMatcherT& __matcher) { - return (_M_collating_symbol(__matcher) - || _M_character_class(__matcher) - || _M_equivalence_class(__matcher) - || (_M_start_range(__matcher) - && _M_range_expression(__matcher))); - } - - template<typename _InIter, typename _TraitsT> - bool - _Compiler<_InIter, _TraitsT>:: - _M_range_expression(_RMatcherT& __matcher) - { - if (!_M_collating_symbol(__matcher)) - if (!_M_match_token(_ScannerT::_S_token_dash)) - __throw_regex_error(regex_constants::error_range); - __matcher._M_make_range(); + if (_M_match_token(_ScannerT::_S_token_bracket_end)) + return false; + _M_expression_term(__matcher); + _M_bracket_list(__matcher); return true; } template<typename _InIter, typename _TraitsT> - bool - _Compiler<_InIter, _TraitsT>:: - _M_start_range(_RMatcherT& __matcher) - { return _M_match_token(_ScannerT::_S_token_dash); } - - template<typename _InIter, typename _TraitsT> - bool + void _Compiler<_InIter, _TraitsT>:: - _M_collating_symbol(_RMatcherT& __matcher) + _M_expression_term(_BMatcherT& __matcher) { - if (_M_match_token(_ScannerT::_S_token_collelem_single)) - { - __matcher._M_add_char(_M_cur_value[0]); - return true; - } if (_M_match_token(_ScannerT::_S_token_collsymbol)) { __matcher._M_add_collating_element(_M_cur_value); - return true; + return; } - return false; - } - - template<typename _InIter, typename _TraitsT> - bool - _Compiler<_InIter, _TraitsT>:: - _M_equivalence_class(_RMatcherT& __matcher) - { if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) { __matcher._M_add_equivalence_class(_M_cur_value); - return true; + return; } - return false; - } - - template<typename _InIter, typename _TraitsT> - bool - _Compiler<_InIter, _TraitsT>:: - _M_character_class(_RMatcherT& __matcher) - { if (_M_match_token(_ScannerT::_S_token_char_class_name)) { __matcher._M_add_character_class(_M_cur_value); - return true; + return; } - return false; + if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a + { + auto __ch = _M_cur_value[0]; + if (_M_match_token(_ScannerT::_S_token_dash)) // [a- + { + // If the dash is the last character in the bracket expression, + // it is not special. + if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end) + __matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-] + else // [a-z] + { + if (!_M_match_token(_ScannerT::_S_token_collelem_single)) + __throw_regex_error(regex_constants::error_range); + __matcher._M_make_range(__ch, _M_cur_value[0]); + } + } + else // [a] + __matcher._M_add_char(__ch); + return; + } + __throw_regex_error(regex_constants::error_brack); } template<typename _InIter, typename _TraitsT> diff --git a/libstdc++-v3/include/bits/regex_nfa.h b/libstdc++-v3/include/bits/regex_nfa.h index 1bae1b7..b4ac452 100644 --- a/libstdc++-v3/include/bits/regex_nfa.h +++ b/libstdc++-v3/include/bits/regex_nfa.h @@ -129,6 +129,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION int _M_index; }; + // TODO For now we use an all-in-one comparator. In the future there may be + // optimizations based on regex_traits::translate and regex_transform. + template<typename _InIterT, typename _TraitsT> + struct _Comparator + { + typedef regex_constants::syntax_option_type _FlagT; + typedef typename _TraitsT::char_type _CharT; + typedef std::basic_string<_CharT> _StringT; + + _Comparator(_FlagT __flags, const _TraitsT& __traits) + : _M_flags(__flags), _M_traits(__traits) + { } + + bool + _M_equ(_CharT __a, _CharT __b) const; + + bool + _M_le(_CharT __a, _CharT __b) const; + + _FlagT _M_flags; + _TraitsT _M_traits; + }; + /// Indicates if current state matches cursor current. typedef std::function<bool (const _PatternCursor&)> _Matcher; @@ -140,12 +163,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /// Matches a single character template<typename _InIterT, typename _TraitsT> struct _CharMatcher + : public _Comparator<_InIterT, _TraitsT> { - typedef typename _TraitsT::char_type char_type; + typedef _Comparator<_InIterT, _TraitsT> _BaseT; + typedef typename _TraitsT::char_type _CharT; + typedef regex_constants::syntax_option_type _FlagT; explicit - _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT()) - : _M_traits(__t), _M_c(_M_traits.translate(__c)) + _CharMatcher(_CharT __c, _FlagT __flags, const _TraitsT& __t) + : _BaseT(__flags, __t), _M_c(__c) { } bool @@ -153,55 +179,79 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { typedef const _SpecializedCursor<_InIterT>& _CursorT; _CursorT __c = static_cast<_CursorT>(__pc); - return _M_traits.translate(__c._M_current()) == _M_c; + return this->_M_equ(__c._M_current(), _M_c); } - const _TraitsT& _M_traits; - char_type _M_c; + _CharT _M_c; }; /// Matches a character range (bracket expression) template<typename _InIterT, typename _TraitsT> - struct _RangeMatcher + struct _BracketMatcher + : public _Comparator<_InIterT, _TraitsT> { - typedef typename _TraitsT::char_type _CharT; - typedef std::basic_string<_CharT> _StringT; + typedef _Comparator<_InIterT, _TraitsT> _BaseT; + typedef typename _TraitsT::char_class_type _CharClassT; + typedef regex_constants::syntax_option_type _FlagT; + typedef typename _TraitsT::char_type _CharT; + typedef std::basic_string<_CharT> _StringT; explicit - _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT()) - : _M_traits(__t), _M_is_non_matching(__is_non_matching) + _BracketMatcher(bool __is_non_matching, + _FlagT __flags, + const _TraitsT& __t) + : _BaseT(__flags, __t), _M_flags(__flags), _M_traits(__t), + _M_is_non_matching(__is_non_matching), _M_class_set(0) { } bool - operator()(const _PatternCursor& __pc) const - { - typedef const _SpecializedCursor<_InIterT>& _CursorT; - _CursorT __c = static_cast<_CursorT>(__pc); - return true; - } + operator()(const _PatternCursor& __pc) const; void _M_add_char(_CharT __c) - { } + { _M_char_set.push_back(__c); } void _M_add_collating_element(const _StringT& __s) - { } + { + auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end()); + if (__st.empty()) + __throw_regex_error(regex_constants::error_collate); + // TODO: digraph + _M_char_set.push_back(__st[0]); + } void _M_add_equivalence_class(const _StringT& __s) - { } + { + _M_add_character_class( + _M_traits.transform_primary(&*__s.begin(), &*__s.end())); + } void _M_add_character_class(const _StringT& __s) - { } + { + auto __st = _M_traits.lookup_classname( + &*__s.begin(), &*__s.end(), (_M_flags & regex_constants::icase)); + if (__st == 0) + __throw_regex_error(regex_constants::error_ctype); + _M_class_set |= __st; + } void - _M_make_range() - { } + _M_make_range(_CharT __l, _CharT __r) + { + if (!this->_M_le(__l, __r)) + __throw_regex_error(regex_constants::error_range); + _M_range_set.push_back(make_pair(__l, __r)); + } - const _TraitsT& _M_traits; - bool _M_is_non_matching; + _FlagT _M_flags; + _TraitsT _M_traits; + bool _M_is_non_matching; + std::vector<_CharT> _M_char_set; + std::vector<pair<_CharT, _CharT>> _M_range_set; + _CharClassT _M_class_set; }; /// Identifies a state in the NFA. diff --git a/libstdc++-v3/include/bits/regex_nfa.tcc b/libstdc++-v3/include/bits/regex_nfa.tcc index e8277f8..12f6fe8 100644 --- a/libstdc++-v3/include/bits/regex_nfa.tcc +++ b/libstdc++-v3/include/bits/regex_nfa.tcc @@ -35,6 +35,64 @@ namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION + template<typename _InIterT, typename _TraitsT> + bool _BracketMatcher<_InIterT, _TraitsT>:: + operator()(const _PatternCursor& __pc) const + { + typedef const _SpecializedCursor<_InIterT>& _CursorT; + _CursorT __c = static_cast<_CursorT>(__pc); + _CharT __ch = __c._M_current(); + bool __ret = false; + for (auto __c : _M_char_set) + if (this->_M_equ(__c, __ch)) + { + __ret = true; + break; + } + if (!__ret && _M_traits.isctype(__ch, _M_class_set)) + __ret = true; + else + { + for (auto& __it : _M_range_set) + if (this->_M_le(__it.first, __ch) && this->_M_le(__ch, __it.second)) + { + __ret = true; + break; + } + } + if (_M_is_non_matching) + __ret = !__ret; + return __ret; + } + + template<typename _InIterT, typename _TraitsT> + bool _Comparator<_InIterT, _TraitsT>:: + _M_equ(_CharT __a, _CharT __b) const + { + if (_M_flags & regex_constants::icase) + return _M_traits.translate_nocase(__a) + == _M_traits.translate_nocase(__b); + if (_M_flags & regex_constants::collate) + return _M_traits.translate(__a) == _M_traits.translate(__b); + return __a == __b; + } + + template<typename _InIterT, typename _TraitsT> + bool _Comparator<_InIterT, _TraitsT>:: + _M_le(_CharT __a, _CharT __b) const + { + _StringT __str1 = _StringT(1, + _M_flags & regex_constants::icase + ? _M_traits.translate_nocase(__a) + : _M_traits.translate(__a)); + _StringT __str2 = _StringT(1, + _M_flags & regex_constants::icase + ? _M_traits.translate_nocase(__b) + : _M_traits.translate(__b)); + return _M_traits.transform(__str1.begin(), __str1.end()) + <= _M_traits.transform(__str2.begin(), __str2.end()); + } + #ifdef _GLIBCXX_DEBUG inline std::ostream& _State:: _M_print(std::ostream& ostr) const diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc index 10e2ff4..aee1dbe 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc @@ -37,7 +37,7 @@ test01() std::string target("zxcv/onetwoabc"); std::smatch m; - VERIFY( std::regex_search(target, m, re) ); + VERIFY( std::regex_match(target, m, re) ); VERIFY( m.size() == 2 ); VERIFY( m[0].matched == true ); VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" ); @@ -50,7 +50,7 @@ test01() std::string target("zxcv/onetwoabc"); std::smatch m; - VERIFY( std::regex_search(target, m, re) ); + VERIFY( std::regex_match(target, m, re) ); VERIFY( m.size() == 3 ); VERIFY( m[0].matched == true ); VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" ); diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc new file mode 100644 index 0000000..3a4ff31 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/cstring_bracket_01.cc @@ -0,0 +1,66 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-08-01 Tim Shen <timshen91@gmail.com> +// +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// 28.11.2 regex_match +// Tests Extended bracket expression against a C-string. + +#include <regex> +#include <testsuite_hooks.h> + +void +test01() +{ + bool test __attribute__((unused)) = true; + + { + std::regex re("pre/[za-x]", std::regex::extended); + VERIFY( std::regex_match("pre/z", re) ); + VERIFY( std::regex_match("pre/a", re) ); + VERIFY( !std::regex_match("pre/y", re) ); + } + { + std::regex re("pre/[[:uPPer:]]", std::regex::extended); + VERIFY( std::regex_match("pre/Z", re) ); + VERIFY( !std::regex_match("pre/_", re) ); + VERIFY( !std::regex_match("pre/a", re) ); + VERIFY( !std::regex_match("pre/0", re) ); + } + { + std::regex re("pre/[[:lOWer:]]", std::regex::extended | std::regex::icase); + VERIFY( std::regex_match("pre/Z", re) ); + VERIFY( std::regex_match("pre/a", re) ); + } + { + std::regex re("pre/[[:w:][.tilde.]]", std::regex::extended); + VERIFY( std::regex_match("pre/~", re) ); + VERIFY( std::regex_match("pre/_", re) ); + VERIFY( std::regex_match("pre/a", re) ); + VERIFY( std::regex_match("pre/0", re) ); + } +} + +int +main() +{ + test01(); + return 0; +} |