diff options
author | Paolo Carlini <pcarlini@suse.de> | 2003-11-24 21:13:38 +0000 |
---|---|---|
committer | Paolo Carlini <paolo@gcc.gnu.org> | 2003-11-24 21:13:38 +0000 |
commit | 3c8797ae81937ef87756d24c2420e1f474ac0cd8 (patch) | |
tree | 791bfd27224a08bac1cf8abd7aa941c454e59645 | |
parent | e0ec69c929263d757b83289e79a4236d4951351a (diff) | |
download | gcc-3c8797ae81937ef87756d24c2420e1f474ac0cd8.zip gcc-3c8797ae81937ef87756d24c2420e1f474ac0cd8.tar.gz gcc-3c8797ae81937ef87756d24c2420e1f474ac0cd8.tar.bz2 |
codecvt_members.cc (codecvt::do_in): Rewrite, using both mbsnrtowcs and mbrtowc in a loop...
2003-11-24 Paolo Carlini <pcarlini@suse.de>
* config/locale/gnu/codecvt_members.cc (codecvt::do_in):
Rewrite, using both mbsnrtowcs and mbrtowc in a loop: the
former is very fast, but stops if encounters a NUL.
(codecvt::do_out): Tweak.
* testsuite/performance/wchar_t_in.cc: New.
2003-11-24 Paolo Carlini <pcarlini@suse.de>
* include/bits/locale_facets.h
(__numpunct_cache<>::_M_cache): Set _M_allocated at the beginning
in order to avoid memory leaks; simplify a bit.
From-SVN: r73893
-rw-r--r-- | libstdc++-v3/ChangeLog | 14 | ||||
-rw-r--r-- | libstdc++-v3/config/locale/gnu/codecvt_members.cc | 73 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/locale_facets.h | 43 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/performance/wchar_t_in.cc | 84 |
4 files changed, 164 insertions, 50 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 9ff983e..9826ee4 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,17 @@ +2003-11-24 Paolo Carlini <pcarlini@suse.de> + + * config/locale/gnu/codecvt_members.cc (codecvt::do_in): + Rewrite, using both mbsnrtowcs and mbrtowc in a loop: the + former is very fast, but stops if encounters a NUL. + (codecvt::do_out): Tweak. + * testsuite/performance/wchar_t_in.cc: New. + +2003-11-24 Paolo Carlini <pcarlini@suse.de> + + * include/bits/locale_facets.h + (__numpunct_cache<>::_M_cache): Set _M_allocated at the beginning + in order to avoid memory leaks; simplify a bit. + 2003-11-24 Benjamin Kosnik <bkoz@redhat.com> * include/bits/streambuf_iterator.h (ostreambuf_iterator): Remove diff --git a/libstdc++-v3/config/locale/gnu/codecvt_members.cc b/libstdc++-v3/config/locale/gnu/codecvt_members.cc index 708941a..daadc31 100644 --- a/libstdc++-v3/config/locale/gnu/codecvt_members.cc +++ b/libstdc++-v3/config/locale/gnu/codecvt_members.cc @@ -97,9 +97,7 @@ namespace std extern_type __buf[MB_LEN_MAX]; __tmp_state = __state; const size_t __conv = wcrtomb(__buf, *__from_next, &__tmp_state); - if (__conv == static_cast<size_t>(-1)) - __ret = error; - else if (__conv > static_cast<size_t>(__to_end - __to_next)) + if (__conv > static_cast<size_t>(__to_end - __to_next)) __ret = partial; else { @@ -126,54 +124,71 @@ namespace std intern_type*& __to_next) const { result __ret = ok; - // This temporary state object is neccessary so __state won't be modified - // if [__from, __from_end) is a partial multibyte character. state_type __tmp_state(__state); + #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) __c_locale __old = __uselocale(_M_c_locale_codecvt); #endif - // Conversion must be done by calling mbrtowc in a loop rather than - // by calling mbsrtowcs because mbsrtowcs assumes that the input - // sequence is zero-terminated. - while (__from < __from_end && __to < __to_end) + // mbsnrtowcs is *very* fast but stops if encounters NUL characters: + // in case we store a L'\0' and then continue, in a loop. + // NB: mbsnrtowcs is a GNU extension + __from_next = __from; + __to_next = __to; + while (__from_next < __from_end && __to_next < __to_end + && __ret == ok) { - size_t __conv = mbrtowc(__to, __from, __from_end - __from, - &__tmp_state); + const extern_type* __from_chunk_end; + __from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0', + __from_end + - __from_next)); + if (!__from_chunk_end) + __from_chunk_end = __from_end; + + const extern_type* __tmp_from = __from_next; + const size_t __conv = mbsnrtowcs(__to_next, &__from_next, + __from_chunk_end - __from_next, + __to_end - __to_next, &__state); if (__conv == static_cast<size_t>(-1)) { + // In case of error, in order to stop at the exact place we + // have to start again from the beginning with a series of + // mbrtowc. + while (__tmp_from < __from_next) + __tmp_from += mbrtowc(__to_next++, __tmp_from, + __from_next - __tmp_from, &__tmp_state); + __state = __tmp_state; __ret = error; - break; } - else if (__conv == static_cast<size_t>(-2)) + else if (__from_next && __from_next < __from_chunk_end) { - // It is unclear what to return in this case (see DR 382). + // It is unclear what to return in this case (see DR 382). + __to_next += __conv; __ret = partial; - break; } - else if (__conv == 0) + else { - // XXX Probably wrong for stateful encodings - __conv = 1; - *__to = L'\0'; + __from_next = __from_chunk_end; + __to_next += __conv; } - __state = __tmp_state; - __to++; - __from += __conv; + if (__from_next < __from_end && __ret == ok) + { + if (__to_next < __to_end) + { + // XXX Probably wrong for stateful encodings + ++__from_next; + *__to_next++ = L'\0'; + } + else + __ret = partial; + } } #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2) __uselocale(__old); #endif - // It is not clear that __from < __from_end implies __ret != ok - // (see DR 382). - if (__ret == ok && __from < __from_end) - __ret = partial; - - __from_next = __from; - __to_next = __to; return __ret; } diff --git a/libstdc++-v3/include/bits/locale_facets.h b/libstdc++-v3/include/bits/locale_facets.h index a433e6c..e6ad5eb 100644 --- a/libstdc++-v3/include/bits/locale_facets.h +++ b/libstdc++-v3/include/bits/locale_facets.h @@ -647,32 +647,35 @@ namespace std void __numpunct_cache<_CharT>::_M_cache(const locale& __loc) { + _M_allocated = true; + _M_grouping = NULL; + _M_truename = _M_falsename = NULL; + const numpunct<_CharT>& __np = use_facet<numpunct<_CharT> >(__loc); - string __grouping = __np.grouping(); - char* __group = new char[__grouping.length() + 1]; - __grouping.copy(__group, __grouping.length()); - __group[__grouping.length()] = char(); - _M_grouping = __group; - - _M_use_grouping = __grouping.length() != 0 && __grouping.data()[0] != 0; - typedef basic_string<_CharT> __string_type; + const string::size_type __len = __np.grouping().size(); + char* __grouping = new char[__len + 1]; + __np.grouping().copy(__grouping, __len); + __grouping[__len] = char(); + _M_grouping = __grouping; + _M_use_grouping = __len && __np.grouping()[0] != 0; - __string_type __true = __np.truename(); - _CharT* __truename = new _CharT[__true.length() + 1]; - __true.copy(__truename, __true.length()); - __truename[__true.length()] = _CharT(); + typedef basic_string<_CharT> __string_type; + typename __string_type::size_type __lentf = __np.truename().size(); + _CharT* __truename = new _CharT[__lentf + 1]; + __np.truename().copy(__truename, __lentf); + __truename[__lentf] = _CharT(); _M_truename = __truename; - - __string_type __false = __np.falsename(); - _CharT* __falsename = new _CharT[__false.length() + 1]; - __false.copy(__falsename, __false.length()); - __falsename[__false.length()] = _CharT(); + + __lentf = __np.falsename().size(); + _CharT* __falsename = new _CharT[__lentf + 1]; + __np.falsename().copy(__falsename, __lentf); + __falsename[__lentf] = _CharT(); _M_falsename = __falsename; - + _M_decimal_point = __np.decimal_point(); _M_thousands_sep = __np.thousands_sep(); - + const ctype<_CharT>& __ct = use_facet<ctype<_CharT> >(__loc); __ct.widen(__num_base::_S_atoms_out, __num_base::_S_atoms_out + __num_base::_S_oend, _M_atoms_out); @@ -680,8 +683,6 @@ namespace std __ct.widen(__num_base::_S_atoms_in, __num_base::_S_atoms_in + __num_base::_S_iend, _M_atoms_in); _M_atoms_in[__num_base::_S_iend] = _CharT(); - - _M_allocated = true; } template<typename _CharT> diff --git a/libstdc++-v3/testsuite/performance/wchar_t_in.cc b/libstdc++-v3/testsuite/performance/wchar_t_in.cc new file mode 100644 index 0000000..43e09e3 --- /dev/null +++ b/libstdc++-v3/testsuite/performance/wchar_t_in.cc @@ -0,0 +1,84 @@ +// Copyright (C) 2003 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 2, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING. If not, write to the Free +// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, +// USA. + +// As a special exception, you may use this file as part of a free software +// library without restriction. Specifically, if other files instantiate +// templates or use macros or inline functions from this file, or you compile +// this file and link it with other files to produce an executable, this +// file does not by itself cause the resulting executable to be covered by +// the GNU General Public License. This exception does not however +// invalidate any other reasons why the executable file might be covered by +// the GNU General Public License. + +#include <cstdio> +#include <cstring> +#include <fstream> +#include <langinfo.h> +#include <iconv.h> +#include <testsuite_performance.h> + +// libstdc++/11602 (do_in) +int main(int argc, char** argv) +{ + using namespace std; + using namespace __gnu_test; + + time_counter time; + resource_counter resource; + const int iters = 400000; + + wchar_t wbuf[1024]; + char cbuf[1024]; + + memset(cbuf, 'a', 1024); + + // C (iconv) + iconv_t cd = iconv_open("WCHAR_T", nl_langinfo(CODESET)); + start_counters(time, resource); + for (int i = 0; i < iters; ++i) + { + size_t inbytesleft = 1024; + size_t outbytesleft = 1024 * sizeof(wchar_t); + char* in = cbuf; + char* out = reinterpret_cast<char*>(wbuf); + iconv(cd, &in, &inbytesleft, &out, &outbytesleft); + } + stop_counters(time, resource); + iconv_close(cd); + report_performance(__FILE__, "C (iconv)", time, resource); + clear_counters(time, resource); + + // C++ (codecvt) + locale loc; + const codecvt<wchar_t, char, mbstate_t>& cvt = + use_facet<codecvt<wchar_t, char, mbstate_t> >(loc); + mbstate_t state; + memset(&state, 0, sizeof(state)); + start_counters(time, resource); + for (int i = 0; i < iters; ++i) + { + const char* from_next; + wchar_t* to_next; + cvt.in(state, cbuf, cbuf + 1024, from_next, + wbuf, wbuf + 1024, to_next); + } + stop_counters(time, resource); + report_performance(__FILE__, "C++ (codecvt)", time, resource); + + return 0; +} |