aboutsummaryrefslogtreecommitdiff
path: root/libstdc++-v3/testsuite/22_locale
diff options
context:
space:
mode:
authorDimitrij Mijoski <dmjpp@hotmail.com>2023-01-10 13:58:59 +0100
committerJonathan Wakely <jwakely@redhat.com>2023-01-13 13:34:20 +0000
commit02dab998665dda0f6df31740e8897c42de3d740f (patch)
tree54183d7eb0819f3f09d06817991c223717deac8b /libstdc++-v3/testsuite/22_locale
parente2fc12a5dafadf15d804e1d2541528296e97a847 (diff)
downloadgcc-02dab998665dda0f6df31740e8897c42de3d740f.zip
gcc-02dab998665dda0f6df31740e8897c42de3d740f.tar.gz
gcc-02dab998665dda0f6df31740e8897c42de3d740f.tar.bz2
libstdc++: Fix Unicode codecvt and add tests [PR86419]
Fixes the conversion from UTF-8 to UTF-16 to properly return partial instead ok. Fixes the conversion from UTF-16 to UTF-8 to properly return partial instead ok. Fixes the conversion from UTF-8 to UCS-2 to properly return partial instead error. Fixes the conversion from UTF-8 to UCS-2 to treat 4-byte UTF-8 sequences as error just by seeing the leading byte. Fixes UTF-8 decoding for all codecvts so they detect error at the end of the input range when the last code point is also incomplete. libstdc++-v3/ChangeLog: PR libstdc++/86419 * src/c++11/codecvt.cc (read_utf8_code_point): Correctly detect errors in incomplete multibyte sequences. (utf16_in): Remove surrogates parameter. Fix conditions for returning partial. (utf16_out): Fix condition for returning partial. (ucs2_in): Do not pass surrogates argument to utf16_in. * testsuite/22_locale/codecvt/codecvt_unicode.cc: New test. * testsuite/22_locale/codecvt/codecvt_unicode.h: New header for tests. * testsuite/22_locale/codecvt/codecvt_unicode_wchar_t.cc: New test.
Diffstat (limited to 'libstdc++-v3/testsuite/22_locale')
-rw-r--r--libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.cc68
-rw-r--r--libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.h1269
-rw-r--r--libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_wchar_t.cc59
3 files changed, 1396 insertions, 0 deletions
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.cc
new file mode 100644
index 0000000..ae4b6c8
--- /dev/null
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.cc
@@ -0,0 +1,68 @@
+// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-do run { target c++11 } }
+
+#include "codecvt_unicode.h"
+
+#include <codecvt>
+
+using namespace std;
+
+void
+test_utf8_utf32_codecvts ()
+{
+ using codecvt_c32 = codecvt<char32_t, char, mbstate_t>;
+ auto loc_c = locale::classic ();
+ VERIFY (has_facet<codecvt_c32> (loc_c));
+ auto &cvt = use_facet<codecvt_c32> (loc_c);
+ test_utf8_utf32_codecvts (cvt);
+
+ auto cvt_ptr = to_unique_ptr (new codecvt_utf8<char32_t> ());
+ test_utf8_utf32_codecvts (*cvt_ptr);
+}
+
+void
+test_utf8_utf16_codecvts ()
+{
+ using codecvt_c16 = codecvt<char16_t, char, mbstate_t>;
+ auto loc_c = locale::classic ();
+ VERIFY (has_facet<codecvt_c16> (loc_c));
+ auto &cvt = use_facet<codecvt_c16> (loc_c);
+ test_utf8_utf16_cvts (cvt);
+
+ auto cvt_ptr = to_unique_ptr (new codecvt_utf8_utf16<char16_t> ());
+ test_utf8_utf16_cvts (*cvt_ptr);
+
+ auto cvt_ptr2 = to_unique_ptr (new codecvt_utf8_utf16<char32_t> ());
+ test_utf8_utf16_cvts (*cvt_ptr2);
+}
+
+void
+test_utf8_ucs2_codecvts ()
+{
+ auto cvt_ptr = to_unique_ptr (new codecvt_utf8<char16_t> ());
+ test_utf8_ucs2_cvts (*cvt_ptr);
+}
+
+int
+main ()
+{
+ test_utf8_utf32_codecvts ();
+ test_utf8_utf16_codecvts ();
+ test_utf8_ucs2_codecvts ();
+}
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.h b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.h
new file mode 100644
index 0000000..99d1a468
--- /dev/null
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.h
@@ -0,0 +1,1269 @@
+// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <locale>
+#include <string>
+#include <memory>
+#include <testsuite_hooks.h>
+
+template <typename T>
+std::unique_ptr<T>
+to_unique_ptr (T *ptr)
+{
+ return std::unique_ptr<T> (ptr);
+}
+
+struct test_offsets_ok
+{
+ size_t in_size, out_size;
+};
+struct test_offsets_partial
+{
+ size_t in_size, out_size, expected_in_next, expected_out_next;
+};
+
+template <class CharT> struct test_offsets_error
+{
+ size_t in_size, out_size, expected_in_next, expected_out_next;
+ CharT replace_char;
+ size_t replace_pos;
+};
+
+template <class T, size_t N>
+auto constexpr array_size (const T (&)[N]) -> size_t
+{
+ return N;
+}
+
+template <class CharT>
+void
+utf8_to_utf32_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
+ const char in[] = "bш\uAAAA\U0010AAAA";
+ const char32_t exp_literal[] = U"bш\uAAAA\U0010AAAA";
+ CharT exp[array_size (exp_literal)] = {};
+ std::copy (begin (exp_literal), end (exp_literal), begin (exp));
+
+ static_assert (array_size (in) == 11, "");
+ static_assert (array_size (exp_literal) == 5, "");
+ static_assert (array_size (exp) == 5, "");
+ VERIFY (char_traits<char>::length (in) == 10);
+ VERIFY (char_traits<char32_t>::length (exp_literal) == 4);
+ VERIFY (char_traits<CharT>::length (exp) == 4);
+
+ test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 4}};
+ for (auto t : offsets)
+ {
+ CharT out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ auto state = mbstate_t{};
+ auto in_next = (const char *) nullptr;
+ auto out_next = (CharT *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.ok);
+ VERIFY (in_next == in + t.in_size);
+ VERIFY (out_next == out + t.out_size);
+ VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
+ if (t.out_size < array_size (out))
+ VERIFY (out[t.out_size] == 0);
+ }
+
+ for (auto t : offsets)
+ {
+ CharT out[array_size (exp)] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ auto state = mbstate_t{};
+ auto in_next = (const char *) nullptr;
+ auto out_next = (CharT *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res
+ = cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
+ VERIFY (res == cvt.ok);
+ VERIFY (in_next == in + t.in_size);
+ VERIFY (out_next == out + t.out_size);
+ VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
+ if (t.out_size < array_size (out))
+ VERIFY (out[t.out_size] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf8_to_utf32_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
+ const char in[] = "bш\uAAAA\U0010AAAA";
+ const char32_t exp_literal[] = U"bш\uAAAA\U0010AAAA";
+ CharT exp[array_size (exp_literal)] = {};
+ std::copy (begin (exp_literal), end (exp_literal), begin (exp));
+
+ static_assert (array_size (in) == 11, "");
+ static_assert (array_size (exp_literal) == 5, "");
+ static_assert (array_size (exp) == 5, "");
+ VERIFY (char_traits<char>::length (in) == 10);
+ VERIFY (char_traits<char32_t>::length (exp_literal) == 4);
+ VERIFY (char_traits<CharT>::length (exp) == 4);
+
+ test_offsets_partial offsets[] = {
+ {1, 0, 0, 0}, // no space for first CP
+
+ {3, 1, 1, 1}, // no space for second CP
+ {2, 2, 1, 1}, // incomplete second CP
+ {2, 1, 1, 1}, // incomplete second CP, and no space for it
+
+ {6, 2, 3, 2}, // no space for third CP
+ {4, 3, 3, 2}, // incomplete third CP
+ {5, 3, 3, 2}, // incomplete third CP
+ {4, 2, 3, 2}, // incomplete third CP, and no space for it
+ {5, 2, 3, 2}, // incomplete third CP, and no space for it
+
+ {10, 3, 6, 3}, // no space for fourth CP
+ {7, 4, 6, 3}, // incomplete fourth CP
+ {8, 4, 6, 3}, // incomplete fourth CP
+ {9, 4, 6, 3}, // incomplete fourth CP
+ {7, 3, 6, 3}, // incomplete fourth CP, and no space for it
+ {8, 3, 6, 3}, // incomplete fourth CP, and no space for it
+ {9, 3, 6, 3}, // incomplete fourth CP, and no space for it
+ };
+
+ for (auto t : offsets)
+ {
+ CharT out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ VERIFY (t.expected_in_next <= t.in_size);
+ VERIFY (t.expected_out_next <= t.out_size);
+ auto state = mbstate_t{};
+ auto in_next = (const char *) nullptr;
+ auto out_next = (CharT *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.partial);
+ VERIFY (in_next == in + t.expected_in_next);
+ VERIFY (out_next == out + t.expected_out_next);
+ VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
+ if (t.expected_out_next < array_size (out))
+ VERIFY (out[t.expected_out_next] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf8_to_utf32_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
+ const char valid_in[] = "bш\uAAAA\U0010AAAA";
+ const char32_t exp_literal[] = U"bш\uAAAA\U0010AAAA";
+ CharT exp[array_size (exp_literal)] = {};
+ std::copy (begin (exp_literal), end (exp_literal), begin (exp));
+
+ static_assert (array_size (valid_in) == 11, "");
+ static_assert (array_size (exp_literal) == 5, "");
+ static_assert (array_size (exp) == 5, "");
+ VERIFY (char_traits<char>::length (valid_in) == 10);
+ VERIFY (char_traits<char32_t>::length (exp_literal) == 4);
+ VERIFY (char_traits<CharT>::length (exp) == 4);
+
+ test_offsets_error<char> offsets[] = {
+
+ // replace leading byte with invalid byte
+ {1, 4, 0, 0, '\xFF', 0},
+ {3, 4, 1, 1, '\xFF', 1},
+ {6, 4, 3, 2, '\xFF', 3},
+ {10, 4, 6, 3, '\xFF', 6},
+
+ // replace first trailing byte with ASCII byte
+ {3, 4, 1, 1, 'z', 2},
+ {6, 4, 3, 2, 'z', 4},
+ {10, 4, 6, 3, 'z', 7},
+
+ // replace first trailing byte with invalid byte
+ {3, 4, 1, 1, '\xFF', 2},
+ {6, 4, 3, 2, '\xFF', 4},
+ {10, 4, 6, 3, '\xFF', 7},
+
+ // replace second trailing byte with ASCII byte
+ {6, 4, 3, 2, 'z', 5},
+ {10, 4, 6, 3, 'z', 8},
+
+ // replace second trailing byte with invalid byte
+ {6, 4, 3, 2, '\xFF', 5},
+ {10, 4, 6, 3, '\xFF', 8},
+
+ // replace third trailing byte
+ {10, 4, 6, 3, 'z', 9},
+ {10, 4, 6, 3, '\xFF', 9},
+
+ // replace first trailing byte with ASCII byte, also incomplete at end
+ {5, 4, 3, 2, 'z', 4},
+ {8, 4, 6, 3, 'z', 7},
+ {9, 4, 6, 3, 'z', 7},
+
+ // replace first trailing byte with invalid byte, also incomplete at end
+ {5, 4, 3, 2, '\xFF', 4},
+ {8, 4, 6, 3, '\xFF', 7},
+ {9, 4, 6, 3, '\xFF', 7},
+
+ // replace second trailing byte with ASCII byte, also incomplete at end
+ {9, 4, 6, 3, 'z', 8},
+
+ // replace second trailing byte with invalid byte, also incomplete at end
+ {9, 4, 6, 3, '\xFF', 8},
+ };
+ for (auto t : offsets)
+ {
+ char in[array_size (valid_in)] = {};
+ CharT out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ VERIFY (t.expected_in_next <= t.in_size);
+ VERIFY (t.expected_out_next <= t.out_size);
+ char_traits<char>::copy (in, valid_in, array_size (valid_in));
+ in[t.replace_pos] = t.replace_char;
+
+ auto state = mbstate_t{};
+ auto in_next = (const char *) nullptr;
+ auto out_next = (CharT *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.error);
+ VERIFY (in_next == in + t.expected_in_next);
+ VERIFY (out_next == out + t.expected_out_next);
+ VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
+ if (t.expected_out_next < array_size (out))
+ VERIFY (out[t.expected_out_next] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf8_to_utf32_in (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ utf8_to_utf32_in_ok (cvt);
+ utf8_to_utf32_in_partial (cvt);
+ utf8_to_utf32_in_error (cvt);
+}
+
+template <class CharT>
+void
+utf32_to_utf8_out_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
+ const char32_t in_literal[] = U"bш\uAAAA\U0010AAAA";
+ const char exp[] = "bш\uAAAA\U0010AAAA";
+ CharT in[array_size (in_literal)] = {};
+ copy (begin (in_literal), end (in_literal), begin (in));
+
+ static_assert (array_size (in_literal) == 5, "");
+ static_assert (array_size (in) == 5, "");
+ static_assert (array_size (exp) == 11, "");
+ VERIFY (char_traits<char32_t>::length (in_literal) == 4);
+ VERIFY (char_traits<CharT>::length (in) == 4);
+ VERIFY (char_traits<char>::length (exp) == 10);
+
+ const test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {4, 10}};
+ for (auto t : offsets)
+ {
+ char out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ auto state = mbstate_t{};
+ auto in_next = (const CharT *) nullptr;
+ auto out_next = (char *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.ok);
+ VERIFY (in_next == in + t.in_size);
+ VERIFY (out_next == out + t.out_size);
+ VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
+ if (t.out_size < array_size (out))
+ VERIFY (out[t.out_size] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf32_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
+ const char32_t in_literal[] = U"bш\uAAAA\U0010AAAA";
+ const char exp[] = "bш\uAAAA\U0010AAAA";
+ CharT in[array_size (in_literal)] = {};
+ copy (begin (in_literal), end (in_literal), begin (in));
+
+ static_assert (array_size (in_literal) == 5, "");
+ static_assert (array_size (in) == 5, "");
+ static_assert (array_size (exp) == 11, "");
+ VERIFY (char_traits<char32_t>::length (in_literal) == 4);
+ VERIFY (char_traits<CharT>::length (in) == 4);
+ VERIFY (char_traits<char>::length (exp) == 10);
+
+ const test_offsets_partial offsets[] = {
+ {1, 0, 0, 0}, // no space for first CP
+
+ {2, 1, 1, 1}, // no space for second CP
+ {2, 2, 1, 1}, // no space for second CP
+
+ {3, 3, 2, 3}, // no space for third CP
+ {3, 4, 2, 3}, // no space for third CP
+ {3, 5, 2, 3}, // no space for third CP
+
+ {4, 6, 3, 6}, // no space for fourth CP
+ {4, 7, 3, 6}, // no space for fourth CP
+ {4, 8, 3, 6}, // no space for fourth CP
+ {4, 9, 3, 6}, // no space for fourth CP
+ };
+ for (auto t : offsets)
+ {
+ char out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ VERIFY (t.expected_in_next <= t.in_size);
+ VERIFY (t.expected_out_next <= t.out_size);
+ auto state = mbstate_t{};
+ auto in_next = (const CharT *) nullptr;
+ auto out_next = (char *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.partial);
+ VERIFY (in_next == in + t.expected_in_next);
+ VERIFY (out_next == out + t.expected_out_next);
+ VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+ if (t.expected_out_next < array_size (out))
+ VERIFY (out[t.expected_out_next] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf32_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ const char32_t valid_in[] = U"bш\uAAAA\U0010AAAA";
+ const char exp[] = "bш\uAAAA\U0010AAAA";
+
+ static_assert (array_size (valid_in) == 5, "");
+ static_assert (array_size (exp) == 11, "");
+ VERIFY (char_traits<char32_t>::length (valid_in) == 4);
+ VERIFY (char_traits<char>::length (exp) == 10);
+
+ test_offsets_error<CharT> offsets[] = {{4, 10, 0, 0, 0x00110000, 0},
+ {4, 10, 1, 1, 0x00110000, 1},
+ {4, 10, 2, 3, 0x00110000, 2},
+ {4, 10, 3, 6, 0x00110000, 3}};
+
+ for (auto t : offsets)
+ {
+ CharT in[array_size (valid_in)] = {};
+ char out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ VERIFY (t.expected_in_next <= t.in_size);
+ VERIFY (t.expected_out_next <= t.out_size);
+ copy (begin (valid_in), end (valid_in), begin (in));
+ in[t.replace_pos] = t.replace_char;
+
+ auto state = mbstate_t{};
+ auto in_next = (const CharT *) nullptr;
+ auto out_next = (char *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.error);
+ VERIFY (in_next == in + t.expected_in_next);
+ VERIFY (out_next == out + t.expected_out_next);
+ VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+ if (t.expected_out_next < array_size (out))
+ VERIFY (out[t.expected_out_next] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf32_to_utf8_out (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ utf32_to_utf8_out_ok (cvt);
+ utf32_to_utf8_out_partial (cvt);
+ utf32_to_utf8_out_error (cvt);
+}
+
+template <class CharT>
+void
+test_utf8_utf32_codecvts (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ utf8_to_utf32_in (cvt);
+ utf32_to_utf8_out (cvt);
+}
+
+template <class CharT>
+void
+utf8_to_utf16_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
+ const char in[] = "bш\uAAAA\U0010AAAA";
+ const char16_t exp_literal[] = u"bш\uAAAA\U0010AAAA";
+ CharT exp[array_size (exp_literal)] = {};
+ copy (begin (exp_literal), end (exp_literal), begin (exp));
+
+ static_assert (array_size (in) == 11, "");
+ static_assert (array_size (exp_literal) == 6, "");
+ static_assert (array_size (exp) == 6, "");
+ VERIFY (char_traits<char>::length (in) == 10);
+ VERIFY (char_traits<char16_t>::length (exp_literal) == 5);
+ VERIFY (char_traits<CharT>::length (exp) == 5);
+
+ test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 5}};
+ for (auto t : offsets)
+ {
+ CharT out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ auto state = mbstate_t{};
+ auto in_next = (const char *) nullptr;
+ auto out_next = (CharT *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.ok);
+ VERIFY (in_next == in + t.in_size);
+ VERIFY (out_next == out + t.out_size);
+ VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
+ if (t.out_size < array_size (out))
+ VERIFY (out[t.out_size] == 0);
+ }
+
+ for (auto t : offsets)
+ {
+ CharT out[array_size (exp)] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ auto state = mbstate_t{};
+ auto in_next = (const char *) nullptr;
+ auto out_next = (CharT *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res
+ = cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
+ VERIFY (res == cvt.ok);
+ VERIFY (in_next == in + t.in_size);
+ VERIFY (out_next == out + t.out_size);
+ VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
+ if (t.out_size < array_size (out))
+ VERIFY (out[t.out_size] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf8_to_utf16_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
+ const char in[] = "bш\uAAAA\U0010AAAA";
+ const char16_t exp_literal[] = u"bш\uAAAA\U0010AAAA";
+ CharT exp[array_size (exp_literal)] = {};
+ copy (begin (exp_literal), end (exp_literal), begin (exp));
+
+ static_assert (array_size (in) == 11, "");
+ static_assert (array_size (exp_literal) == 6, "");
+ static_assert (array_size (exp) == 6, "");
+ VERIFY (char_traits<char>::length (in) == 10);
+ VERIFY (char_traits<char16_t>::length (exp_literal) == 5);
+ VERIFY (char_traits<CharT>::length (exp) == 5);
+
+ test_offsets_partial offsets[] = {
+ {1, 0, 0, 0}, // no space for first CP
+
+ {3, 1, 1, 1}, // no space for second CP
+ {2, 2, 1, 1}, // incomplete second CP
+ {2, 1, 1, 1}, // incomplete second CP, and no space for it
+
+ {6, 2, 3, 2}, // no space for third CP
+ {4, 3, 3, 2}, // incomplete third CP
+ {5, 3, 3, 2}, // incomplete third CP
+ {4, 2, 3, 2}, // incomplete third CP, and no space for it
+ {5, 2, 3, 2}, // incomplete third CP, and no space for it
+
+ {10, 3, 6, 3}, // no space for fourth CP
+ {10, 4, 6, 3}, // no space for fourth CP
+ {7, 5, 6, 3}, // incomplete fourth CP
+ {8, 5, 6, 3}, // incomplete fourth CP
+ {9, 5, 6, 3}, // incomplete fourth CP
+ {7, 3, 6, 3}, // incomplete fourth CP, and no space for it
+ {8, 3, 6, 3}, // incomplete fourth CP, and no space for it
+ {9, 3, 6, 3}, // incomplete fourth CP, and no space for it
+ {7, 4, 6, 3}, // incomplete fourth CP, and no space for it
+ {8, 4, 6, 3}, // incomplete fourth CP, and no space for it
+ {9, 4, 6, 3}, // incomplete fourth CP, and no space for it
+
+ };
+
+ for (auto t : offsets)
+ {
+ CharT out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ VERIFY (t.expected_in_next <= t.in_size);
+ VERIFY (t.expected_out_next <= t.out_size);
+ auto state = mbstate_t{};
+ auto in_next = (const char *) nullptr;
+ auto out_next = (CharT *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.partial);
+ VERIFY (in_next == in + t.expected_in_next);
+ VERIFY (out_next == out + t.expected_out_next);
+ VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
+ if (t.expected_out_next < array_size (out))
+ VERIFY (out[t.expected_out_next] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf8_to_utf16_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ const char valid_in[] = "bш\uAAAA\U0010AAAA";
+ const char16_t exp_literal[] = u"bш\uAAAA\U0010AAAA";
+ CharT exp[array_size (exp_literal)] = {};
+ copy (begin (exp_literal), end (exp_literal), begin (exp));
+
+ static_assert (array_size (valid_in) == 11, "");
+ static_assert (array_size (exp_literal) == 6, "");
+ static_assert (array_size (exp) == 6, "");
+ VERIFY (char_traits<char>::length (valid_in) == 10);
+ VERIFY (char_traits<char16_t>::length (exp_literal) == 5);
+ VERIFY (char_traits<CharT>::length (exp) == 5);
+
+ test_offsets_error<char> offsets[] = {
+
+ // replace leading byte with invalid byte
+ {1, 5, 0, 0, '\xFF', 0},
+ {3, 5, 1, 1, '\xFF', 1},
+ {6, 5, 3, 2, '\xFF', 3},
+ {10, 5, 6, 3, '\xFF', 6},
+
+ // replace first trailing byte with ASCII byte
+ {3, 5, 1, 1, 'z', 2},
+ {6, 5, 3, 2, 'z', 4},
+ {10, 5, 6, 3, 'z', 7},
+
+ // replace first trailing byte with invalid byte
+ {3, 5, 1, 1, '\xFF', 2},
+ {6, 5, 3, 2, '\xFF', 4},
+ {10, 5, 6, 3, '\xFF', 7},
+
+ // replace second trailing byte with ASCII byte
+ {6, 5, 3, 2, 'z', 5},
+ {10, 5, 6, 3, 'z', 8},
+
+ // replace second trailing byte with invalid byte
+ {6, 5, 3, 2, '\xFF', 5},
+ {10, 5, 6, 3, '\xFF', 8},
+
+ // replace third trailing byte
+ {10, 5, 6, 3, 'z', 9},
+ {10, 5, 6, 3, '\xFF', 9},
+
+ // replace first trailing byte with ASCII byte, also incomplete at end
+ {5, 5, 3, 2, 'z', 4},
+ {8, 5, 6, 3, 'z', 7},
+ {9, 5, 6, 3, 'z', 7},
+
+ // replace first trailing byte with invalid byte, also incomplete at end
+ {5, 5, 3, 2, '\xFF', 4},
+ {8, 5, 6, 3, '\xFF', 7},
+ {9, 5, 6, 3, '\xFF', 7},
+
+ // replace second trailing byte with ASCII byte, also incomplete at end
+ {9, 5, 6, 3, 'z', 8},
+
+ // replace second trailing byte with invalid byte, also incomplete at end
+ {9, 5, 6, 3, '\xFF', 8},
+ };
+ for (auto t : offsets)
+ {
+ char in[array_size (valid_in)] = {};
+ CharT out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ VERIFY (t.expected_in_next <= t.in_size);
+ VERIFY (t.expected_out_next <= t.out_size);
+ char_traits<char>::copy (in, valid_in, array_size (valid_in));
+ in[t.replace_pos] = t.replace_char;
+
+ auto state = mbstate_t{};
+ auto in_next = (const char *) nullptr;
+ auto out_next = (CharT *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.error);
+ VERIFY (in_next == in + t.expected_in_next);
+ VERIFY (out_next == out + t.expected_out_next);
+ VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
+ if (t.expected_out_next < array_size (out))
+ VERIFY (out[t.expected_out_next] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf8_to_utf16_in (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ utf8_to_utf16_in_ok (cvt);
+ utf8_to_utf16_in_partial (cvt);
+ utf8_to_utf16_in_error (cvt);
+}
+
+template <class CharT>
+void
+utf16_to_utf8_out_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
+ const char16_t in_literal[] = u"bш\uAAAA\U0010AAAA";
+ const char exp[] = "bш\uAAAA\U0010AAAA";
+ CharT in[array_size (in_literal)];
+ copy (begin (in_literal), end (in_literal), begin (in));
+
+ static_assert (array_size (in_literal) == 6, "");
+ static_assert (array_size (exp) == 11, "");
+ static_assert (array_size (in) == 6, "");
+ VERIFY (char_traits<char16_t>::length (in_literal) == 5);
+ VERIFY (char_traits<char>::length (exp) == 10);
+ VERIFY (char_traits<CharT>::length (in) == 5);
+
+ const test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {5, 10}};
+ for (auto t : offsets)
+ {
+ char out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ auto state = mbstate_t{};
+ auto in_next = (const CharT *) nullptr;
+ auto out_next = (char *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.ok);
+ VERIFY (in_next == in + t.in_size);
+ VERIFY (out_next == out + t.out_size);
+ VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
+ if (t.out_size < array_size (out))
+ VERIFY (out[t.out_size] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf16_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
+ const char16_t in_literal[] = u"bш\uAAAA\U0010AAAA";
+ const char exp[] = "bш\uAAAA\U0010AAAA";
+ CharT in[array_size (in_literal)];
+ copy (begin (in_literal), end (in_literal), begin (in));
+
+ static_assert (array_size (in_literal) == 6, "");
+ static_assert (array_size (exp) == 11, "");
+ static_assert (array_size (in) == 6, "");
+ VERIFY (char_traits<char16_t>::length (in_literal) == 5);
+ VERIFY (char_traits<char>::length (exp) == 10);
+ VERIFY (char_traits<CharT>::length (in) == 5);
+
+ const test_offsets_partial offsets[] = {
+ {1, 0, 0, 0}, // no space for first CP
+
+ {2, 1, 1, 1}, // no space for second CP
+ {2, 2, 1, 1}, // no space for second CP
+
+ {3, 3, 2, 3}, // no space for third CP
+ {3, 4, 2, 3}, // no space for third CP
+ {3, 5, 2, 3}, // no space for third CP
+
+ {5, 6, 3, 6}, // no space for fourth CP
+ {5, 7, 3, 6}, // no space for fourth CP
+ {5, 8, 3, 6}, // no space for fourth CP
+ {5, 9, 3, 6}, // no space for fourth CP
+
+ {4, 10, 3, 6}, // incomplete fourth CP
+
+ {4, 6, 3, 6}, // incomplete fourth CP, and no space for it
+ {4, 7, 3, 6}, // incomplete fourth CP, and no space for it
+ {4, 8, 3, 6}, // incomplete fourth CP, and no space for it
+ {4, 9, 3, 6}, // incomplete fourth CP, and no space for it
+ };
+ for (auto t : offsets)
+ {
+ char out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ VERIFY (t.expected_in_next <= t.in_size);
+ VERIFY (t.expected_out_next <= t.out_size);
+ auto state = mbstate_t{};
+ auto in_next = (const CharT *) nullptr;
+ auto out_next = (char *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.partial);
+ VERIFY (in_next == in + t.expected_in_next);
+ VERIFY (out_next == out + t.expected_out_next);
+ VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+ if (t.expected_out_next < array_size (out))
+ VERIFY (out[t.expected_out_next] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf16_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ const char16_t valid_in[] = u"bш\uAAAA\U0010AAAA";
+ const char exp[] = "bш\uAAAA\U0010AAAA";
+
+ static_assert (array_size (valid_in) == 6, "");
+ static_assert (array_size (exp) == 11, "");
+ VERIFY (char_traits<char16_t>::length (valid_in) == 5);
+ VERIFY (char_traits<char>::length (exp) == 10);
+
+ test_offsets_error<CharT> offsets[] = {
+ {5, 10, 0, 0, 0xD800, 0},
+ {5, 10, 0, 0, 0xDBFF, 0},
+ {5, 10, 0, 0, 0xDC00, 0},
+ {5, 10, 0, 0, 0xDFFF, 0},
+
+ {5, 10, 1, 1, 0xD800, 1},
+ {5, 10, 1, 1, 0xDBFF, 1},
+ {5, 10, 1, 1, 0xDC00, 1},
+ {5, 10, 1, 1, 0xDFFF, 1},
+
+ {5, 10, 2, 3, 0xD800, 2},
+ {5, 10, 2, 3, 0xDBFF, 2},
+ {5, 10, 2, 3, 0xDC00, 2},
+ {5, 10, 2, 3, 0xDFFF, 2},
+
+ // make the leading surrogate a trailing one
+ {5, 10, 3, 6, 0xDC00, 3},
+ {5, 10, 3, 6, 0xDFFF, 3},
+
+ // make the trailing surrogate a leading one
+ {5, 10, 3, 6, 0xD800, 4},
+ {5, 10, 3, 6, 0xDBFF, 4},
+
+ // make the trailing surrogate a BMP char
+ {5, 10, 3, 6, u'z', 4},
+ };
+
+ for (auto t : offsets)
+ {
+ CharT in[array_size (valid_in)] = {};
+ char out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ VERIFY (t.expected_in_next <= t.in_size);
+ VERIFY (t.expected_out_next <= t.out_size);
+ copy (begin (valid_in), end (valid_in), begin (in));
+ in[t.replace_pos] = t.replace_char;
+
+ auto state = mbstate_t{};
+ auto in_next = (const CharT *) nullptr;
+ auto out_next = (char *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.error);
+ VERIFY (in_next == in + t.expected_in_next);
+ VERIFY (out_next == out + t.expected_out_next);
+ VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+ if (t.expected_out_next < array_size (out))
+ VERIFY (out[t.expected_out_next] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf16_to_utf8_out (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ utf16_to_utf8_out_ok (cvt);
+ utf16_to_utf8_out_partial (cvt);
+ utf16_to_utf8_out_error (cvt);
+}
+
+template <class CharT>
+void
+test_utf8_utf16_cvts (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ utf8_to_utf16_in (cvt);
+ utf16_to_utf8_out (cvt);
+}
+
+template <class CharT>
+void
+utf8_to_ucs2_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
+ const char in[] = "bш\uAAAA";
+ const char16_t exp_literal[] = u"bш\uAAAA";
+ CharT exp[array_size (exp_literal)] = {};
+ copy (begin (exp_literal), end (exp_literal), begin (exp));
+
+ static_assert (array_size (in) == 7, "");
+ static_assert (array_size (exp_literal) == 4, "");
+ static_assert (array_size (exp) == 4, "");
+ VERIFY (char_traits<char>::length (in) == 6);
+ VERIFY (char_traits<char16_t>::length (exp_literal) == 3);
+ VERIFY (char_traits<CharT>::length (exp) == 3);
+
+ test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}};
+ for (auto t : offsets)
+ {
+ CharT out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ auto state = mbstate_t{};
+ auto in_next = (const char *) nullptr;
+ auto out_next = (CharT *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.ok);
+ VERIFY (in_next == in + t.in_size);
+ VERIFY (out_next == out + t.out_size);
+ VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
+ if (t.out_size < array_size (out))
+ VERIFY (out[t.out_size] == 0);
+ }
+
+ for (auto t : offsets)
+ {
+ CharT out[array_size (exp)] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ auto state = mbstate_t{};
+ auto in_next = (const char *) nullptr;
+ auto out_next = (CharT *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res
+ = cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
+ VERIFY (res == cvt.ok);
+ VERIFY (in_next == in + t.in_size);
+ VERIFY (out_next == out + t.out_size);
+ VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
+ if (t.out_size < array_size (out))
+ VERIFY (out[t.out_size] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf8_to_ucs2_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
+ const char in[] = "bш\uAAAA";
+ const char16_t exp_literal[] = u"bш\uAAAA";
+ CharT exp[array_size (exp_literal)] = {};
+ copy (begin (exp_literal), end (exp_literal), begin (exp));
+
+ static_assert (array_size (in) == 7, "");
+ static_assert (array_size (exp_literal) == 4, "");
+ static_assert (array_size (exp) == 4, "");
+ VERIFY (char_traits<char>::length (in) == 6);
+ VERIFY (char_traits<char16_t>::length (exp_literal) == 3);
+ VERIFY (char_traits<CharT>::length (exp) == 3);
+
+ test_offsets_partial offsets[] = {
+ {1, 0, 0, 0}, // no space for first CP
+
+ {3, 1, 1, 1}, // no space for second CP
+ {2, 2, 1, 1}, // incomplete second CP
+ {2, 1, 1, 1}, // incomplete second CP, and no space for it
+
+ {6, 2, 3, 2}, // no space for third CP
+ {4, 3, 3, 2}, // incomplete third CP
+ {5, 3, 3, 2}, // incomplete third CP
+ {4, 2, 3, 2}, // incomplete third CP, and no space for it
+ {5, 2, 3, 2}, // incomplete third CP, and no space for it
+ };
+
+ for (auto t : offsets)
+ {
+ CharT out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ VERIFY (t.expected_in_next <= t.in_size);
+ VERIFY (t.expected_out_next <= t.out_size);
+ auto state = mbstate_t{};
+ auto in_next = (const char *) nullptr;
+ auto out_next = (CharT *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.partial);
+ VERIFY (in_next == in + t.expected_in_next);
+ VERIFY (out_next == out + t.expected_out_next);
+ VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
+ if (t.expected_out_next < array_size (out))
+ VERIFY (out[t.expected_out_next] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf8_to_ucs2_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ const char valid_in[] = "bш\uAAAA\U0010AAAA";
+ const char16_t exp_literal[] = u"bш\uAAAA\U0010AAAA";
+ CharT exp[array_size (exp_literal)] = {};
+ copy (begin (exp_literal), end (exp_literal), begin (exp));
+
+ static_assert (array_size (valid_in) == 11, "");
+ static_assert (array_size (exp_literal) == 6, "");
+ static_assert (array_size (exp) == 6, "");
+ VERIFY (char_traits<char>::length (valid_in) == 10);
+ VERIFY (char_traits<char16_t>::length (exp_literal) == 5);
+ VERIFY (char_traits<CharT>::length (exp) == 5);
+
+ test_offsets_error<char> offsets[] = {
+
+ // replace leading byte with invalid byte
+ {1, 5, 0, 0, '\xFF', 0},
+ {3, 5, 1, 1, '\xFF', 1},
+ {6, 5, 3, 2, '\xFF', 3},
+ {10, 5, 6, 3, '\xFF', 6},
+
+ // replace first trailing byte with ASCII byte
+ {3, 5, 1, 1, 'z', 2},
+ {6, 5, 3, 2, 'z', 4},
+ {10, 5, 6, 3, 'z', 7},
+
+ // replace first trailing byte with invalid byte
+ {3, 5, 1, 1, '\xFF', 2},
+ {6, 5, 3, 2, '\xFF', 4},
+ {10, 5, 6, 3, '\xFF', 7},
+
+ // replace second trailing byte with ASCII byte
+ {6, 5, 3, 2, 'z', 5},
+ {10, 5, 6, 3, 'z', 8},
+
+ // replace second trailing byte with invalid byte
+ {6, 5, 3, 2, '\xFF', 5},
+ {10, 5, 6, 3, '\xFF', 8},
+
+ // replace third trailing byte
+ {10, 5, 6, 3, 'z', 9},
+ {10, 5, 6, 3, '\xFF', 9},
+
+ // When we see a leading byte of 4-byte CP, we should return error, no
+ // matter if it is incomplete at the end or has errors in the trailing
+ // bytes.
+
+ // Don't replace anything, show full 4-byte CP
+ {10, 4, 6, 3, 'b', 0},
+ {10, 5, 6, 3, 'b', 0},
+
+ // Don't replace anything, show incomplete 4-byte CP at the end
+ {7, 4, 6, 3, 'b', 0}, // incomplete fourth CP
+ {8, 4, 6, 3, 'b', 0}, // incomplete fourth CP
+ {9, 4, 6, 3, 'b', 0}, // incomplete fourth CP
+ {7, 5, 6, 3, 'b', 0}, // incomplete fourth CP
+ {8, 5, 6, 3, 'b', 0}, // incomplete fourth CP
+ {9, 5, 6, 3, 'b', 0}, // incomplete fourth CP
+
+ // replace first trailing byte with ASCII byte, also incomplete at end
+ {5, 5, 3, 2, 'z', 4},
+
+ // replace first trailing byte with invalid byte, also incomplete at end
+ {5, 5, 3, 2, '\xFF', 4},
+
+ // replace first trailing byte with ASCII byte, also incomplete at end
+ {8, 5, 6, 3, 'z', 7},
+ {9, 5, 6, 3, 'z', 7},
+
+ // replace first trailing byte with invalid byte, also incomplete at end
+ {8, 5, 6, 3, '\xFF', 7},
+ {9, 5, 6, 3, '\xFF', 7},
+
+ // replace second trailing byte with ASCII byte, also incomplete at end
+ {9, 5, 6, 3, 'z', 8},
+
+ // replace second trailing byte with invalid byte, also incomplete at end
+ {9, 5, 6, 3, '\xFF', 8},
+ };
+ for (auto t : offsets)
+ {
+ char in[array_size (valid_in)] = {};
+ CharT out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ VERIFY (t.expected_in_next <= t.in_size);
+ VERIFY (t.expected_out_next <= t.out_size);
+ char_traits<char>::copy (in, valid_in, array_size (valid_in));
+ in[t.replace_pos] = t.replace_char;
+
+ auto state = mbstate_t{};
+ auto in_next = (const char *) nullptr;
+ auto out_next = (CharT *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.error);
+ VERIFY (in_next == in + t.expected_in_next);
+ VERIFY (out_next == out + t.expected_out_next);
+ VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
+ if (t.expected_out_next < array_size (out))
+ VERIFY (out[t.expected_out_next] == 0);
+ }
+}
+
+template <class CharT>
+void
+utf8_to_ucs2_in (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ utf8_to_ucs2_in_ok (cvt);
+ utf8_to_ucs2_in_partial (cvt);
+ utf8_to_ucs2_in_error (cvt);
+}
+
+template <class CharT>
+void
+ucs2_to_utf8_out_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
+ const char16_t in_literal[] = u"bш\uAAAA";
+ const char exp[] = "bш\uAAAA";
+ CharT in[array_size (in_literal)] = {};
+ copy (begin (in_literal), end (in_literal), begin (in));
+
+ static_assert (array_size (in_literal) == 4, "");
+ static_assert (array_size (exp) == 7, "");
+ static_assert (array_size (in) == 4, "");
+ VERIFY (char_traits<char16_t>::length (in_literal) == 3);
+ VERIFY (char_traits<char>::length (exp) == 6);
+ VERIFY (char_traits<CharT>::length (in) == 3);
+
+ const test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}};
+ for (auto t : offsets)
+ {
+ char out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ auto state = mbstate_t{};
+ auto in_next = (const CharT *) nullptr;
+ auto out_next = (char *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.ok);
+ VERIFY (in_next == in + t.in_size);
+ VERIFY (out_next == out + t.out_size);
+ VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
+ if (t.out_size < array_size (out))
+ VERIFY (out[t.out_size] == 0);
+ }
+}
+
+template <class CharT>
+void
+ucs2_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
+ const char16_t in_literal[] = u"bш\uAAAA";
+ const char exp[] = "bш\uAAAA";
+ CharT in[array_size (in_literal)] = {};
+ copy (begin (in_literal), end (in_literal), begin (in));
+
+ static_assert (array_size (in_literal) == 4, "");
+ static_assert (array_size (exp) == 7, "");
+ static_assert (array_size (in) == 4, "");
+ VERIFY (char_traits<char16_t>::length (in_literal) == 3);
+ VERIFY (char_traits<char>::length (exp) == 6);
+ VERIFY (char_traits<CharT>::length (in) == 3);
+
+ const test_offsets_partial offsets[] = {
+ {1, 0, 0, 0}, // no space for first CP
+
+ {2, 1, 1, 1}, // no space for second CP
+ {2, 2, 1, 1}, // no space for second CP
+
+ {3, 3, 2, 3}, // no space for third CP
+ {3, 4, 2, 3}, // no space for third CP
+ {3, 5, 2, 3}, // no space for third CP
+ };
+ for (auto t : offsets)
+ {
+ char out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ VERIFY (t.expected_in_next <= t.in_size);
+ VERIFY (t.expected_out_next <= t.out_size);
+ auto state = mbstate_t{};
+ auto in_next = (const CharT *) nullptr;
+ auto out_next = (char *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.partial);
+ VERIFY (in_next == in + t.expected_in_next);
+ VERIFY (out_next == out + t.expected_out_next);
+ VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+ if (t.expected_out_next < array_size (out))
+ VERIFY (out[t.expected_out_next] == 0);
+ }
+}
+
+template <class CharT>
+void
+ucs2_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ using namespace std;
+ const char16_t valid_in[] = u"bш\uAAAA\U0010AAAA";
+ const char exp[] = "bш\uAAAA\U0010AAAA";
+
+ static_assert (array_size (valid_in) == 6, "");
+ static_assert (array_size (exp) == 11, "");
+ VERIFY (char_traits<char16_t>::length (valid_in) == 5);
+ VERIFY (char_traits<char>::length (exp) == 10);
+
+ test_offsets_error<CharT> offsets[] = {
+ {5, 10, 0, 0, 0xD800, 0},
+ {5, 10, 0, 0, 0xDBFF, 0},
+ {5, 10, 0, 0, 0xDC00, 0},
+ {5, 10, 0, 0, 0xDFFF, 0},
+
+ {5, 10, 1, 1, 0xD800, 1},
+ {5, 10, 1, 1, 0xDBFF, 1},
+ {5, 10, 1, 1, 0xDC00, 1},
+ {5, 10, 1, 1, 0xDFFF, 1},
+
+ {5, 10, 2, 3, 0xD800, 2},
+ {5, 10, 2, 3, 0xDBFF, 2},
+ {5, 10, 2, 3, 0xDC00, 2},
+ {5, 10, 2, 3, 0xDFFF, 2},
+
+ // dont replace anything, just show the surrogate pair
+ {5, 10, 3, 6, u'b', 0},
+
+ // make the leading surrogate a trailing one
+ {5, 10, 3, 6, 0xDC00, 3},
+ {5, 10, 3, 6, 0xDFFF, 3},
+
+ // make the trailing surrogate a leading one
+ {5, 10, 3, 6, 0xD800, 4},
+ {5, 10, 3, 6, 0xDBFF, 4},
+
+ // make the trailing surrogate a BMP char
+ {5, 10, 3, 6, u'z', 4},
+
+ {5, 7, 3, 6, u'b', 0}, // no space for fourth CP
+ {5, 8, 3, 6, u'b', 0}, // no space for fourth CP
+ {5, 9, 3, 6, u'b', 0}, // no space for fourth CP
+
+ {4, 10, 3, 6, u'b', 0}, // incomplete fourth CP
+ {4, 7, 3, 6, u'b', 0}, // incomplete fourth CP, and no space for it
+ {4, 8, 3, 6, u'b', 0}, // incomplete fourth CP, and no space for it
+ {4, 9, 3, 6, u'b', 0}, // incomplete fourth CP, and no space for it
+
+ };
+
+ for (auto t : offsets)
+ {
+ CharT in[array_size (valid_in)] = {};
+ char out[array_size (exp) - 1] = {};
+ VERIFY (t.in_size <= array_size (in));
+ VERIFY (t.out_size <= array_size (out));
+ VERIFY (t.expected_in_next <= t.in_size);
+ VERIFY (t.expected_out_next <= t.out_size);
+ copy (begin (valid_in), end (valid_in), begin (in));
+ in[t.replace_pos] = t.replace_char;
+
+ auto state = mbstate_t{};
+ auto in_next = (const CharT *) nullptr;
+ auto out_next = (char *) nullptr;
+ auto res = codecvt_base::result ();
+
+ res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
+ out_next);
+ VERIFY (res == cvt.error);
+ VERIFY (in_next == in + t.expected_in_next);
+ VERIFY (out_next == out + t.expected_out_next);
+ VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
+ if (t.expected_out_next < array_size (out))
+ VERIFY (out[t.expected_out_next] == 0);
+ }
+}
+
+template <class CharT>
+void
+ucs2_to_utf8_out (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ ucs2_to_utf8_out_ok (cvt);
+ ucs2_to_utf8_out_partial (cvt);
+ ucs2_to_utf8_out_error (cvt);
+}
+
+template <class CharT>
+void
+test_utf8_ucs2_cvts (const std::codecvt<CharT, char, mbstate_t> &cvt)
+{
+ utf8_to_ucs2_in (cvt);
+ ucs2_to_utf8_out (cvt);
+}
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_wchar_t.cc b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_wchar_t.cc
new file mode 100644
index 0000000..1695049
--- /dev/null
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode_wchar_t.cc
@@ -0,0 +1,59 @@
+// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-do run { target c++11 } }
+
+#include "codecvt_unicode.h"
+
+#include <codecvt>
+
+using namespace std;
+
+void
+test_utf8_utf32_codecvts ()
+{
+#if __SIZEOF_WCHAR_T__ == 4
+ auto cvt_ptr = to_unique_ptr (new codecvt_utf8<wchar_t> ());
+ test_utf8_utf32_codecvts (*cvt_ptr);
+#endif
+}
+
+void
+test_utf8_utf16_codecvts ()
+{
+#if __SIZEOF_WCHAR_T__ >= 2
+ auto cvt_ptr = to_unique_ptr (new codecvt_utf8_utf16<wchar_t> ());
+ test_utf8_utf16_cvts (*cvt_ptr);
+#endif
+}
+
+void
+test_utf8_ucs2_codecvts ()
+{
+#if __SIZEOF_WCHAR_T__ == 2
+ auto cvt_ptr = to_unique_ptr (new codecvt_utf8<wchar_t> ());
+ test_utf8_ucs2_cvts (*cvt_ptr);
+#endif
+}
+
+int
+main ()
+{
+ test_utf8_utf32_codecvts ();
+ test_utf8_utf16_codecvts ();
+ test_utf8_ucs2_codecvts ();
+}