diff options
author | Tim Shen <timshen@google.com> | 2016-12-01 03:03:55 +0000 |
---|---|---|
committer | Tim Shen <timshen@gcc.gnu.org> | 2016-12-01 03:03:55 +0000 |
commit | 974afa584b18a3713eb4ada1b5d657ca0f94f498 (patch) | |
tree | 99b6e42fe5cfb1174faef0662f47640649cba2a5 /libstdc++-v3 | |
parent | 0f09127808018c479214478d3f79e0ac40a3c55b (diff) | |
download | gcc-974afa584b18a3713eb4ada1b5d657ca0f94f498.zip gcc-974afa584b18a3713eb4ada1b5d657ca0f94f498.tar.gz gcc-974afa584b18a3713eb4ada1b5d657ca0f94f498.tar.bz2 |
re PR libstdc++/71500 (regex::icase only works on first character in a range)
PR libstdc++/71500
* include/bits/regex.h (basic_regex::basic_regex): Use ECMAScript
when the syntax is not specified.
* include/bits/regex_compiler.h (_RegexTranslator,
_RegexTranslatorBase): Partially support icase in ranges.
* include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply):
Refactor _M_apply to make the control flow easier to follow, and
call _M_translator._M_match_range as added previously.
* testsuite/28_regex/traits/char/icase.cc: Add new tests.
* testsuite/28_regex/traits/char/user_defined.cc: Add new tests.
From-SVN: r243093
Diffstat (limited to 'libstdc++-v3')
-rw-r--r-- | libstdc++-v3/ChangeLog | 13 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex.h | 4 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_compiler.h | 102 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_compiler.tcc | 51 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/28_regex/traits/char/icase.cc | 74 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc | 66 |
6 files changed, 261 insertions, 49 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 6d3bfcc..cd4f5ae 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,16 @@ +2016-11-31 Tim Shen <timshen@google.com> + + PR libstdc++/71500 + * include/bits/regex.h (basic_regex::basic_regex): Use ECMAScript + when the syntax is not specified. + * include/bits/regex_compiler.h (_RegexTranslator, + _RegexTranslatorBase): Partially support icase in ranges. + * include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply): + Refactor _M_apply to make the control flow easier to follow, and + call _M_translator._M_match_range as added previously. + * testsuite/28_regex/traits/char/icase.cc: Add new tests. + * testsuite/28_regex/traits/char/user_defined.cc: Add new tests. + 2016-11-30 Ville Voutilainen <ville.voutilainen@gmail.com> Fix testsuite failures caused by the patch implementing LWG 2534. diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index aadf312..224d3db 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -762,7 +762,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 template<typename _FwdIter> basic_regex(_FwdIter __first, _FwdIter __last, locale_type __loc, flag_type __f) - : _M_flags(__f), _M_loc(std::move(__loc)), + : _M_flags((__f & (ECMAScript | basic | extended | awk | grep | egrep)) + ? __f : (__f | ECMAScript)), + _M_loc(std::move(__loc)), _M_automaton(__detail::__compile_nfa<_FwdIter, _Rx_traits>( std::move(__first), std::move(__last), _M_loc, _M_flags)) { } diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index 410d61b..318cb3b 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -30,6 +30,15 @@ namespace std _GLIBCXX_VISIBILITY(default) { +_GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_BEGIN_NAMESPACE_CXX11 + + template<typename> + class regex_traits; + +_GLIBCXX_END_NAMESPACE_CXX11 +_GLIBCXX_END_NAMESPACE_VERSION + namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION @@ -207,17 +216,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // [28.13.14] template<typename _TraitsT, bool __icase, bool __collate> - class _RegexTranslator + class _RegexTranslatorBase { public: typedef typename _TraitsT::char_type _CharT; typedef typename _TraitsT::string_type _StringT; - typedef typename std::conditional<__collate, - _StringT, - _CharT>::type _StrTransT; + typedef _StringT _StrTransT; explicit - _RegexTranslator(const _TraitsT& __traits) + _RegexTranslatorBase(const _TraitsT& __traits) : _M_traits(__traits) { } @@ -235,23 +242,86 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StrTransT _M_transform(_CharT __ch) const { - return _M_transform_impl(__ch, typename integral_constant<bool, - __collate>::type()); + _StrTransT __str(1, __ch); + return _M_traits.transform(__str.begin(), __str.end()); } - private: + // See LWG 523. It's not efficiently implementable when _TraitsT is not + // std::regex_traits<>, and __collate is true. See specializations for + // implementations of other cases. + bool + _M_match_range(const _StrTransT& __first, const _StrTransT& __last, + const _StrTransT& __s) const + { return __first <= __s && __s <= __last; } + + protected: + bool _M_in_range_icase(_CharT __first, _CharT __last, _CharT __ch) const + { + typedef std::ctype<_CharT> __ctype_type; + const auto& __fctyp = use_facet<__ctype_type>(this->_M_traits.getloc()); + auto __lower = __fctyp.tolower(__ch); + auto __upper = __fctyp.toupper(__ch); + return (__first <= __lower && __lower <= __last) + || (__first <= __upper && __upper <= __last); + } + + const _TraitsT& _M_traits; + }; + + template<typename _TraitsT, bool __icase, bool __collate> + class _RegexTranslator + : public _RegexTranslatorBase<_TraitsT, __icase, __collate> + { + public: + typedef _RegexTranslatorBase<_TraitsT, __icase, __collate> _Base; + using _Base::_Base; + }; + + template<typename _TraitsT, bool __icase> + class _RegexTranslator<_TraitsT, __icase, false> + : public _RegexTranslatorBase<_TraitsT, __icase, false> + { + public: + typedef _RegexTranslatorBase<_TraitsT, __icase, false> _Base; + typedef typename _Base::_CharT _CharT; + typedef _CharT _StrTransT; + + using _Base::_Base; + _StrTransT - _M_transform_impl(_CharT __ch, false_type) const + _M_transform(_CharT __ch) const { return __ch; } - _StrTransT - _M_transform_impl(_CharT __ch, true_type) const + bool + _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const { - _StrTransT __str = _StrTransT(1, _M_translate(__ch)); - return _M_traits.transform(__str.begin(), __str.end()); + if (!__icase) + return __first <= __ch && __ch <= __last; + return this->_M_in_range_icase(__first, __last, __ch); } + }; - const _TraitsT& _M_traits; + template<typename _CharType> + class _RegexTranslator<std::regex_traits<_CharType>, true, true> + : public _RegexTranslatorBase<std::regex_traits<_CharType>, true, true> + { + public: + typedef _RegexTranslatorBase<std::regex_traits<_CharType>, true, true> + _Base; + typedef typename _Base::_CharT _CharT; + typedef typename _Base::_StrTransT _StrTransT; + + using _Base::_Base; + + bool + _M_match_range(const _StrTransT& __first, const _StrTransT& __last, + const _StrTransT& __str) const + { + __glibcxx_assert(__first.size() == 1); + __glibcxx_assert(__last.size() == 1); + __glibcxx_assert(__str.size() == 1); + return this->_M_in_range_icase(__first[0], __last[0], __str[0]); + } }; template<typename _TraitsT> @@ -272,6 +342,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StrTransT _M_transform(_CharT __ch) const { return __ch; } + + bool + _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const + { return __first <= __ch && __ch <= __last; } }; template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate> diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index ef6ebdd..07e56d5 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -612,37 +612,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _BracketMatcher<_TraitsT, __icase, __collate>:: _M_apply(_CharT __ch, false_type) const { - bool __ret = std::binary_search(_M_char_set.begin(), _M_char_set.end(), - _M_translator._M_translate(__ch)); - if (!__ret) - { - auto __s = _M_translator._M_transform(__ch); - for (auto& __it : _M_range_set) - if (__it.first <= __s && __s <= __it.second) - { - __ret = true; - break; - } - if (_M_traits.isctype(__ch, _M_class_set)) - __ret = true; - else if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(), - _M_traits.transform_primary(&__ch, &__ch+1)) - != _M_equiv_set.end()) - __ret = true; - else - { - for (auto& __it : _M_neg_class_set) - if (!_M_traits.isctype(__ch, __it)) - { - __ret = true; - break; - } - } - } - if (_M_is_non_matching) - return !__ret; - else - return __ret; + return [this, __ch] + { + if (std::binary_search(_M_char_set.begin(), _M_char_set.end(), + _M_translator._M_translate(__ch))) + return true; + auto __s = _M_translator._M_transform(__ch); + for (auto& __it : _M_range_set) + if (_M_translator._M_match_range(__it.first, __it.second, __s)) + return true; + if (_M_traits.isctype(__ch, _M_class_set)) + return true; + if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(), + _M_traits.transform_primary(&__ch, &__ch+1)) + != _M_equiv_set.end()) + return true; + for (auto& __it : _M_neg_class_set) + if (!_M_traits.isctype(__ch, __it)) + return true; + return false; + }() ^ _M_is_non_matching; } _GLIBCXX_END_NAMESPACE_VERSION diff --git a/libstdc++-v3/testsuite/28_regex/traits/char/icase.cc b/libstdc++-v3/testsuite/28_regex/traits/char/icase.cc new file mode 100644 index 0000000..97bbd07 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/traits/char/icase.cc @@ -0,0 +1,74 @@ +// { dg-do run { target c++11 } } + +// +// Copyright (C) 2016 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// 28.7 Class template regex_traits [re.traits] + +#include <regex> +#include <testsuite_hooks.h> + +using namespace std; + +void +test01() +{ + { + regex re("[T-f]", regex::icase); + + VERIFY(regex_match("A", re)); + VERIFY(regex_match("F", re)); + VERIFY(regex_match("a", re)); + VERIFY(regex_match("f", re)); + + VERIFY(!regex_match("G", re)); + VERIFY(!regex_match("S", re)); + VERIFY(!regex_match("g", re)); + VERIFY(!regex_match("s", re)); + + VERIFY(regex_match("T", re)); + VERIFY(regex_match("Z", re)); + VERIFY(regex_match("t", re)); + VERIFY(regex_match("z", re)); + } + // icase works with std::regex_traits<>, because we know how it's implemented. + { + regex re("[T-f]", regex::icase | regex::collate); + + VERIFY(regex_match("A", re)); + VERIFY(regex_match("F", re)); + VERIFY(regex_match("a", re)); + VERIFY(regex_match("f", re)); + + VERIFY(!regex_match("G", re)); + VERIFY(!regex_match("S", re)); + VERIFY(!regex_match("g", re)); + VERIFY(!regex_match("s", re)); + + VERIFY(regex_match("T", re)); + VERIFY(regex_match("Z", re)); + VERIFY(regex_match("t", re)); + VERIFY(regex_match("z", re)); + } +} + +int main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc b/libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc index 4af0563..5888ce1 100644 --- a/libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc +++ b/libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc @@ -30,6 +30,9 @@ using namespace std; +bool called_transform = false; +bool called_nocase = false; + template<typename CharT> class MyRegexTraits : public regex_traits<CharT> @@ -40,14 +43,71 @@ template<typename CharT> { return c+1; } + + CharT + translate_nocase(CharT c) const + { + called_nocase = true; + return regex_traits<CharT>::translate_nocase(c); + } + + template<typename FwdIt> + basic_string<CharT> + transform(FwdIt begin, FwdIt end) const + { + called_transform = true; + return regex_traits<CharT>::transform(begin, end); + } }; void test01() { - basic_regex<char, MyRegexTraits<char>> re("."); - VERIFY(!regex_match("\n", re)); - VERIFY(!regex_match("\r", re)); + { + basic_regex<char, MyRegexTraits<char>> re("."); + VERIFY(!regex_match("\n", re)); + VERIFY(!regex_match("\r", re)); + } + { + VERIFY(!called_transform); + basic_regex<char, MyRegexTraits<char>> re("[a]", regex::collate); + VERIFY(regex_match("a", re)); + VERIFY(exchange(called_transform, false)); + } + { + VERIFY(!called_nocase); + basic_regex<char, MyRegexTraits<char>> re("[a]", regex::icase); + VERIFY(regex_match("A", re)); + VERIFY(exchange(called_nocase, false)); + } + { + basic_regex<char, MyRegexTraits<char>> re("[T-f]", regex::icase); + VERIFY(regex_match("A", re)); + VERIFY(regex_match("F", re)); + VERIFY(regex_match("a", re)); + VERIFY(regex_match("f", re)); + + VERIFY(!regex_match("G", re)); + VERIFY(!regex_match("S", re)); + VERIFY(!regex_match("g", re)); + VERIFY(!regex_match("s", re)); + + VERIFY(regex_match("T", re)); + VERIFY(regex_match("Z", re)); + VERIFY(regex_match("t", re)); + VERIFY(regex_match("z", re)); + } + // icase doesn't participate with the presence of collate and user-defined traits. + { + basic_regex<char, MyRegexTraits<char>> re("[T-f]", regex::icase | regex::collate); + VERIFY(!regex_match("A", re)); + VERIFY(!regex_match("S", re)); + VERIFY(regex_match("T", re)); + VERIFY(regex_match("Z", re)); + VERIFY(regex_match("a", re)); + VERIFY(regex_match("f", re)); + VERIFY(!regex_match("g", re)); + } } int main() |