aboutsummaryrefslogtreecommitdiff
path: root/libstdc++-v3/testsuite/22_locale
diff options
context:
space:
mode:
authorJonathan Wakely <jwakely@redhat.com>2017-03-17 19:28:05 +0000
committerJonathan Wakely <redi@gcc.gnu.org>2017-03-17 19:28:05 +0000
commitd951e75dfe83b86dd2c46c7835e03bbf04b29278 (patch)
tree0e22d701b0437cb98ff145680df2d6dab207e8bb /libstdc++-v3/testsuite/22_locale
parentd1a73b0baead836a8d813a6a63459ef87a270bba (diff)
downloadgcc-d951e75dfe83b86dd2c46c7835e03bbf04b29278.zip
gcc-d951e75dfe83b86dd2c46c7835e03bbf04b29278.tar.gz
gcc-d951e75dfe83b86dd2c46c7835e03bbf04b29278.tar.bz2
Fix alignment bugs in std::codecvt_utf16
* src/c++11/codecvt.cc (range): Add non-type template parameter and define oerloaded operators for reading and writing code units. (range<Elem, false>): Define partial specialization for accessing wide characters in potentially unaligned byte ranges. (ucs2_span(const char16_t*, const char16_t*, ...)) (ucs4_span(const char16_t*, const char16_t*, ...)): Change parameters to range<const char16_t, false> in order to avoid unaligned reads. (__codecvt_utf16_base<char16_t>::do_out) (__codecvt_utf16_base<char32_t>::do_out) (__codecvt_utf16_base<wchar_t>::do_out): Use range specialization for unaligned data to avoid unaligned writes. (__codecvt_utf16_base<char16_t>::do_in) (__codecvt_utf16_base<char32_t>::do_in) (__codecvt_utf16_base<wchar_t>::do_in): Likewise for writes. Return error if there are unprocessable trailing bytes. (__codecvt_utf16_base<char16_t>::do_length) (__codecvt_utf16_base<char32_t>::do_length) (__codecvt_utf16_base<wchar_t>::do_length): Pass arguments of type range<const char16_t, false> to span functions. * testsuite/22_locale/codecvt/codecvt_utf16/misaligned.cc: New test. From-SVN: r246245
Diffstat (limited to 'libstdc++-v3/testsuite/22_locale')
-rw-r--r--libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc27
-rw-r--r--libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/misaligned.cc289
2 files changed, 316 insertions, 0 deletions
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc
index 9383818..d8b9729 100644
--- a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/79980.cc
@@ -103,6 +103,31 @@ test07()
VERIFY( conv.converted() == 5 );
}
+void
+test08()
+{
+ // Read/write UTF-16 code units from data not correctly aligned for char16_t
+ Conv<char16_t, 0x10FFFF, std::generate_header> conv;
+ const char src[] = "-\xFE\xFF\0\x61\xAB\xCD";
+ auto out = conv.from_bytes(src + 1, src + 7);
+ VERIFY( out[0] == 0x0061 );
+ VERIFY( out[1] == 0xabcd );
+ auto bytes = conv.to_bytes(out);
+ VERIFY( bytes == std::string(src + 1, 6) );
+}
+
+void
+test09()
+{
+ // Read/write UTF-16 code units from data not correctly aligned for char16_t
+ Conv<char32_t, 0x10FFFF, std::generate_header> conv;
+ const char src[] = "-\xFE\xFF\xD8\x08\xDF\x45";
+ auto out = conv.from_bytes(src + 1, src + 7);
+ VERIFY( out == U"\U00012345" );
+ auto bytes = conv.to_bytes(out);
+ VERIFY( bytes == std::string(src + 1, 6) );
+}
+
int main()
{
test01();
@@ -112,4 +137,6 @@ int main()
test05();
test06();
test07();
+ test08();
+ test09();
}
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/misaligned.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/misaligned.cc
new file mode 100644
index 0000000..0179c18
--- /dev/null
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_utf16/misaligned.cc
@@ -0,0 +1,289 @@
+// Copyright (C) 2017 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-do run { target c++11 } }
+
+#include <locale>
+#include <codecvt>
+#include <testsuite_hooks.h>
+
+using std::codecvt_base;
+using std::codecvt_mode;
+using std::codecvt_utf16;
+using std::wstring_convert;
+using std::mbstate_t;
+
+constexpr codecvt_mode
+operator|(codecvt_mode m1, codecvt_mode m2)
+{
+ using underlying = std::underlying_type<codecvt_mode>::type;
+ return static_cast<codecvt_mode>(static_cast<underlying>(m1) | m2);
+}
+
+// Read/write UTF-16 code units from data not correctly aligned for char16_t
+
+void
+test01()
+{
+ mbstate_t st;
+ constexpr codecvt_mode m = std::consume_header|std::generate_header;
+ codecvt_utf16<char16_t, 0x10FFFF, m> conv;
+ const char src[] = "-\xFE\xFF\0\x61\xAB\xCD";
+ const char* const src_end = src + 7;
+
+ int len = conv.length(st, src + 1, src_end, 1);
+ VERIFY( len == 4 );
+ len = conv.length(st, src + 1, src_end, 2);
+ VERIFY( len == 6 );
+
+ char16_t dst[2];
+ char16_t* const dst_end = dst + 2;
+ char16_t* dst_next;
+ const char* src_cnext;
+ auto res = conv.in(st, src + 1, src_end, src_cnext, dst, dst_end, dst_next);
+ VERIFY( res == codecvt_base::ok );
+ VERIFY( dst[0] == 0x0061 );
+ VERIFY( dst[1] == 0xabcd );
+ VERIFY( src_cnext == src_end );
+ VERIFY( dst_next == dst_end );
+
+ char out[sizeof(src)] = { src[0] };
+ char* const out_end = out + 7;
+ char* out_next;
+ const char16_t* dst_cnext;
+ res = conv.out(st, dst, dst_end, dst_cnext, out + 1, out_end, out_next);
+ VERIFY( res == codecvt_base::ok );
+ VERIFY( out_next == out_end );
+ VERIFY( dst_cnext == dst_end );
+ VERIFY( out[1] == src[1] );
+ VERIFY( out[2] == src[2] );
+ VERIFY( out[3] == src[3] );
+ VERIFY( out[4] == src[4] );
+ VERIFY( out[5] == src[5] );
+ VERIFY( out[6] == src[6] );
+
+ codecvt_utf16<char16_t, 0x10FFFF, m|std::little_endian> conv_le;
+
+ len = conv_le.length(st, src + 1, src_end, 1);
+ VERIFY( len == 4 );
+ len = conv_le.length(st, src + 1, src_end, 2);
+ VERIFY( len == 6 );
+
+ res = conv_le.in(st, src + 1, src_end, src_cnext, dst, dst_end, dst_next);
+ VERIFY( res == codecvt_base::ok );
+ VERIFY( dst[0] == 0x0061 );
+ VERIFY( dst[1] == 0xabcd );
+ VERIFY( src_cnext == src_end );
+ VERIFY( dst_next == dst_end );
+
+ res = conv_le.out(st, dst, dst_end, dst_cnext, out + 1, out_end, out_next);
+ VERIFY( res == codecvt_base::ok );
+ VERIFY( out_next == out_end );
+ VERIFY( dst_cnext == dst_end );
+ VERIFY( out[1] == src[2] );
+ VERIFY( out[2] == src[1] );
+ VERIFY( out[3] == src[4] );
+ VERIFY( out[4] == src[3] );
+ VERIFY( out[5] == src[6] );
+ VERIFY( out[6] == src[5] );
+}
+
+void
+test02()
+{
+ mbstate_t st;
+ constexpr codecvt_mode m = std::consume_header|std::generate_header;
+ codecvt_utf16<char32_t, 0x10FFFF, m> conv;
+ const char src[] = "-\xFE\xFF\0\x61\xAB\xCD\xD8\x08\xDF\x45";
+ const char* const src_end = src + 11;
+
+ int len = conv.length(st, src + 1, src_end, 1);
+ VERIFY( len == 4 );
+ len = conv.length(st, src + 1, src_end, 2);
+ VERIFY( len == 6 );
+ len = conv.length(st, src + 1, src_end, -1ul);
+ VERIFY( len == 10 );
+
+ char32_t dst[3];
+ char32_t* const dst_end = dst + 3;
+ char32_t* dst_next;
+ const char* src_cnext;
+ auto res = conv.in(st, src + 1, src_end, src_cnext, dst, dst_end, dst_next);
+ VERIFY( res == codecvt_base::ok );
+ VERIFY( dst[0] == 0x0061 );
+ VERIFY( dst[1] == 0xabcd );
+ VERIFY( dst[2] == 0x012345 );
+ VERIFY( src_cnext == src_end );
+ VERIFY( dst_next == dst_end );
+
+ char out[sizeof(src)] = { src[0] };
+ char* const out_end = out + 11;
+ char* out_next;
+ const char32_t* dst_cnext;
+ res = conv.out(st, dst, dst_end, dst_cnext, out + 1, out_end, out_next);
+ VERIFY( res == codecvt_base::ok );
+ VERIFY( out_next == out_end );
+ VERIFY( dst_cnext == dst_end );
+ VERIFY( out[1] == src[1] );
+ VERIFY( out[2] == src[2] );
+ VERIFY( out[3] == src[3] );
+ VERIFY( out[4] == src[4] );
+ VERIFY( out[5] == src[5] );
+ VERIFY( out[6] == src[6] );
+ VERIFY( out[7] == src[7] );
+ VERIFY( out[8] == src[8] );
+ VERIFY( out[9] == src[9] );
+ VERIFY( out[10] == src[10] );
+
+ codecvt_utf16<char32_t, 0x10FFFF, m|std::little_endian> conv_le;
+
+ len = conv_le.length(st, src + 1, src_end, 1);
+ VERIFY( len == 4 );
+ len = conv_le.length(st, src + 1, src_end, 2);
+ VERIFY( len == 6 );
+ len = conv.length(st, src + 1, src_end, -1ul);
+ VERIFY( len == 10 );
+
+ res = conv_le.in(st, src + 1, src_end, src_cnext, dst, dst_end, dst_next);
+ VERIFY( res == codecvt_base::ok );
+ VERIFY( dst[0] == 0x0061 );
+ VERIFY( dst[1] == 0xabcd );
+ VERIFY( dst[2] == 0x012345 );
+ VERIFY( src_cnext == src_end );
+ VERIFY( dst_next == dst_end );
+
+ res = conv_le.out(st, dst, dst_end, dst_cnext, out + 1, out_end, out_next);
+ VERIFY( res == codecvt_base::ok );
+ VERIFY( out_next == out_end );
+ VERIFY( dst_cnext == dst_end );
+ VERIFY( out[1] == src[2] );
+ VERIFY( out[2] == src[1] );
+ VERIFY( out[3] == src[4] );
+ VERIFY( out[4] == src[3] );
+ VERIFY( out[5] == src[6] );
+ VERIFY( out[6] == src[5] );
+ VERIFY( out[7] == src[8] );
+ VERIFY( out[8] == src[7] );
+ VERIFY( out[9] == src[10] );
+ VERIFY( out[10] == src[9] );
+}
+
+void
+test03()
+{
+#ifdef _GLIBCXX_USE_WCHAR_T
+ mbstate_t st;
+ constexpr codecvt_mode m = std::consume_header|std::generate_header;
+ codecvt_utf16<wchar_t, 0x10FFFF, m> conv;
+ const char src[] = "-\xFE\xFF\0\x61\xAB\xCD\xD8\x08\xDF\x45";
+ const size_t in_len = sizeof(wchar_t) == 4 ? 11 : 7;
+ const size_t out_len = sizeof(wchar_t) == 4 ? 3 : 2;
+ const char* const src_end = src + in_len;
+
+ int len = conv.length(st, src + 1, src_end, 1);
+ VERIFY( len == 4 );
+ len = conv.length(st, src + 1, src_end, 2);
+ VERIFY( len == 6 );
+ if (sizeof(wchar_t) == 4)
+ {
+ len = conv.length(st, src + 1, src_end, -1ul);
+ VERIFY( len == 10 );
+ }
+
+ wchar_t dst[out_len];
+ wchar_t* const dst_end = dst + out_len;
+ wchar_t* dst_next;
+ const char* src_cnext;
+ auto res = conv.in(st, src + 1, src_end, src_cnext, dst, dst_end, dst_next);
+ VERIFY( res == codecvt_base::ok );
+ VERIFY( dst[0] == 0x0061 );
+ VERIFY( dst[1] == 0xabcd );
+ if (sizeof(wchar_t) == 4)
+ VERIFY( dst[2] == 0x012345 );
+ VERIFY( src_cnext == src_end );
+ VERIFY( dst_next == dst_end );
+
+ char out[sizeof(src)] = { src[0] };
+ char* const out_end = out + in_len;
+ char* out_next;
+ const wchar_t* dst_cnext;
+ res = conv.out(st, dst, dst_end, dst_cnext, out + 1, out_end, out_next);
+ VERIFY( res == codecvt_base::ok );
+ VERIFY( out_next == out_end );
+ VERIFY( dst_cnext == dst_end );
+ VERIFY( out[1] == src[1] );
+ VERIFY( out[2] == src[2] );
+ VERIFY( out[3] == src[3] );
+ VERIFY( out[4] == src[4] );
+ VERIFY( out[5] == src[5] );
+ VERIFY( out[6] == src[6] );
+ if (sizeof(wchar_t) == 4)
+ {
+ VERIFY( out[7] == src[7] );
+ VERIFY( out[8] == src[8] );
+ VERIFY( out[9] == src[9] );
+ VERIFY( out[10] == src[10] );
+ }
+
+ codecvt_utf16<wchar_t, 0x10FFFF, m|std::little_endian> conv_le;
+
+ len = conv_le.length(st, src + 1, src_end, 1);
+ VERIFY( len == 4 );
+ len = conv_le.length(st, src + 1, src_end, 2);
+ VERIFY( len == 6 );
+ if (sizeof(wchar_t) == 4)
+ {
+ len = conv.length(st, src + 1, src_end, -1ul);
+ VERIFY( len == 10 );
+ }
+
+ res = conv_le.in(st, src + 1, src_end, src_cnext, dst, dst_end, dst_next);
+ VERIFY( res == codecvt_base::ok );
+ VERIFY( dst[0] == 0x0061 );
+ VERIFY( dst[1] == 0xabcd );
+ if (sizeof(wchar_t) == 4)
+ VERIFY( dst[2] == 0x012345 );
+ VERIFY( src_cnext == src_end );
+ VERIFY( dst_next == dst_end );
+
+ res = conv_le.out(st, dst, dst_end, dst_cnext, out + 1, out_end, out_next);
+ VERIFY( res == codecvt_base::ok );
+ VERIFY( out_next == out_end );
+ VERIFY( dst_cnext == dst_end );
+ VERIFY( out[1] == src[2] );
+ VERIFY( out[2] == src[1] );
+ VERIFY( out[3] == src[4] );
+ VERIFY( out[4] == src[3] );
+ VERIFY( out[5] == src[6] );
+ VERIFY( out[6] == src[5] );
+ if (sizeof(wchar_t) == 4)
+ {
+ VERIFY( out[7] == src[8] );
+ VERIFY( out[8] == src[7] );
+ VERIFY( out[9] == src[10] );
+ VERIFY( out[10] == src[9] );
+ }
+#endif
+}
+
+int
+main()
+{
+ test01();
+ test02();
+ test03();
+}