aboutsummaryrefslogtreecommitdiff
path: root/libstdc++-v3/src
diff options
context:
space:
mode:
authorJonathan Wakely <jwakely@redhat.com>2015-01-16 23:38:35 +0000
committerJonathan Wakely <redi@gcc.gnu.org>2015-01-16 23:38:35 +0000
commit28af1fb39dfbf903ccafeafda927d280fd8768d8 (patch)
tree8c798f7195e89e40829e60d3b8f2b3e02730a5ea /libstdc++-v3/src
parent0f59f5c12003e547e7557cc09da39c2abd63403d (diff)
downloadgcc-28af1fb39dfbf903ccafeafda927d280fd8768d8.zip
gcc-28af1fb39dfbf903ccafeafda927d280fd8768d8.tar.gz
gcc-28af1fb39dfbf903ccafeafda927d280fd8768d8.tar.bz2
Implement C++11 <codecvt> header.
* config/abi/pre/gnu.ver: Export new symbols. * include/Makefile.am: Add codecvt. * include/Makefile.in: Regenerate. * include/std/codecvt: New header. * src/c++11/codecvt.cc (__codecvt_utf8_base, __codecvt_utf16_base, __codecvt_utf8_utf16_base): Define specializations. * testsuite/22_locale/codecvt/codecvt_utf8/requirements/1.cc: New. * testsuite/22_locale/codecvt/codecvt_utf16/requirements/1.cc: New. * testsuite/22_locale/codecvt/codecvt_utf8_utf16/requirements/1.cc: New. From-SVN: r219779
Diffstat (limited to 'libstdc++-v3/src')
-rw-r--r--libstdc++-v3/src/c++11/codecvt.cc1029
1 files changed, 988 insertions, 41 deletions
diff --git a/libstdc++-v3/src/c++11/codecvt.cc b/libstdc++-v3/src/c++11/codecvt.cc
index fdd4972..7eed903 100644
--- a/libstdc++-v3/src/c++11/codecvt.cc
+++ b/libstdc++-v3/src/c++11/codecvt.cc
@@ -22,10 +22,9 @@
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
-#include <bits/locale_classes.h>
-#include <bits/codecvt.h>
-#include <bits/stl_algobase.h> // std::max
+#include <codecvt>
#include <cstring> // std::memcpy, std::memcmp
+#include <bits/stl_algobase.h> // std::max
#ifdef _GLIBCXX_USE_C99_STDINT_TR1
namespace std _GLIBCXX_VISIBILITY(default)
@@ -51,6 +50,88 @@ namespace
size_t size() const { return end - next; }
};
+ // Multibyte sequences can have "header" consisting of Byte Order Mark
+ const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
+ const unsigned char utf16_bom[4] = { 0xFE, 0xFF };
+ const unsigned char utf16le_bom[4] = { 0xFF, 0xFE };
+
+ template<size_t N>
+ inline bool
+ write_bom(range<char>& to, const unsigned char (&bom)[N])
+ {
+ if (to.size() < N)
+ return false;
+ memcpy(to.next, bom, N);
+ to.next += N;
+ return true;
+ }
+
+ // If generate_header is set in mode write out UTF-8 BOM.
+ bool
+ write_utf8_bom(range<char>& to, codecvt_mode mode)
+ {
+ if (mode & generate_header)
+ return write_bom(to, utf8_bom);
+ return true;
+ }
+
+ // If generate_header is set in mode write out the UTF-16 BOM indicated
+ // by whether little_endian is set in mode.
+ bool
+ write_utf16_bom(range<char16_t>& to, codecvt_mode mode)
+ {
+ if (mode & generate_header)
+ {
+ if (!to.size())
+ return false;
+ auto* bom = (mode & little_endian) ? utf16le_bom : utf16_bom;
+ std::memcpy(to.next, bom, 2);
+ ++to.next;
+ }
+ return true;
+ }
+
+ template<size_t N>
+ inline bool
+ read_bom(range<const char>& from, const unsigned char (&bom)[N])
+ {
+ if (from.size() >= N && !memcmp(from.next, bom, N))
+ {
+ from.next += N;
+ return true;
+ }
+ return false;
+ }
+
+ // If consume_header is set in mode update from.next to after any BOM.
+ void
+ read_utf8_bom(range<const char>& from, codecvt_mode mode)
+ {
+ if (mode & consume_header)
+ read_bom(from, utf8_bom);
+ }
+
+ // If consume_header is set in mode update from.next to after any BOM.
+ // Return little_endian iff the UTF-16LE BOM was present.
+ codecvt_mode
+ read_utf16_bom(range<const char16_t>& from, codecvt_mode mode)
+ {
+ if (mode & consume_header && from.size())
+ {
+ if (*from.next == 0xFEFF)
+ ++from.next;
+ else if (*from.next == 0xFFFE)
+ {
+ ++from.next;
+ return little_endian;
+ }
+ }
+ return {};
+ }
+
+ // Read a codepoint from a UTF-8 multibyte sequence.
+ // Updates from.next if the codepoint is not greater than maxcode.
+ // Returns -1 if there is an invalid or incomplete multibyte character.
char32_t
read_utf8_code_point(range<const char>& from, unsigned long maxcode)
{
@@ -74,9 +155,8 @@ namespace
if ((c2 & 0xC0) != 0x80)
return -1;
char32_t c = (c1 << 6) + c2 - 0x3080;
- if (c > maxcode)
- return -1;
- from.next += 2;
+ if (c <= maxcode)
+ from.next += 2;
return c;
}
else if (c1 < 0xF0) // 3-byte sequence
@@ -92,9 +172,8 @@ namespace
if ((c3 & 0xC0) != 0x80)
return -1;
char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
- if (c > maxcode)
- return -1;
- from.next += 3;
+ if (c <= maxcode)
+ from.next += 3;
return c;
}
else if (c1 < 0xF5) // 4-byte sequence
@@ -115,9 +194,8 @@ namespace
if ((c4 & 0xC0) != 0x80)
return -1;
char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
- if (c > maxcode)
- return -1;
- from.next += 4;
+ if (c <= maxcode)
+ from.next += 4;
return c;
}
else // > U+10FFFF
@@ -162,9 +240,48 @@ namespace
return true;
}
+ inline char16_t
+ adjust_byte_order(char16_t c, codecvt_mode mode)
+ {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return (mode & little_endian) ? __builtin_bswap16(c) : c;
+#else
+ return (mode & little_endian) ? c : __builtin_bswap16(c);
+#endif
+ }
+
+ // Read a codepoint from a UTF-16 multibyte sequence.
+ // The sequence's endianness is indicated by (mode & little_endian).
+ // Updates from.next if the codepoint is not greater than maxcode.
+ // Returns -1 if there is an incomplete multibyte character.
+ char32_t
+ read_utf16_code_point(range<const char16_t>& from, unsigned long maxcode,
+ codecvt_mode mode)
+ {
+ int inc = 1;
+ char32_t c = adjust_byte_order(from.next[0], mode);
+ if (c >= 0xD800 && c <= 0xDBFF)
+ {
+ if (from.size() < 2)
+ return -1;
+ const char16_t c2 = adjust_byte_order(from.next[1], mode);
+ if (c2 >= 0xDC00 && c2 <= 0xDFFF)
+ {
+ c = (c << 10) + c2 - 0x35FDC00;
+ inc = 2;
+ }
+ }
+ if (c <= maxcode)
+ from.next += inc;
+ return c;
+ }
+
+ template<typename C>
bool
- write_utf16_code_point(range<char16_t>& to, char32_t codepoint)
+ write_utf16_code_point(range<C>& to, char32_t codepoint, codecvt_mode mode)
{
+ static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
+
if (codepoint < max_single_utf16_unit)
{
if (to.size() > 0)
@@ -183,8 +300,8 @@ namespace
char16_t trail = 0xDC00 + (codepoint & 0x3FF);
char32_t utf16bytes = (lead << 10) + trail + SURROGATE_OFFSET;
- to.next[0] = utf16bytes >> 16;
- to.next[1] = utf16bytes & 0xFFFF;
+ to.next[0] = adjust_byte_order(utf16bytes >> 16, mode);
+ to.next[1] = adjust_byte_order(utf16bytes & 0xFFFF, mode);
to.next += 2;
return true;
}
@@ -194,12 +311,15 @@ namespace
// utf8 -> ucs4
codecvt_base::result
ucs4_in(range<const char>& from, range<char32_t>& to,
- unsigned long maxcode = max_code_point)
+ unsigned long maxcode = max_code_point, codecvt_mode mode = {})
{
+ read_utf8_bom(from, mode);
while (from.size() && to.size())
{
const char32_t codepoint = read_utf8_code_point(from, maxcode);
- if (codepoint == char32_t(-1) || codepoint > maxcode)
+ if (codepoint == char32_t(-1))
+ break;
+ if (codepoint > maxcode)
return codecvt_base::error;
*to.next++ = codepoint;
}
@@ -209,8 +329,10 @@ namespace
// ucs4 -> utf8
codecvt_base::result
ucs4_out(range<const char32_t>& from, range<char>& to,
- unsigned long maxcode = max_code_point)
+ unsigned long maxcode = max_code_point, codecvt_mode mode = {})
{
+ if (!write_utf8_bom(to, mode))
+ return codecvt_base::partial;
while (from.size())
{
const char32_t c = from.next[0];
@@ -223,20 +345,62 @@ namespace
return codecvt_base::ok;
}
+ // utf16 -> ucs4
+ codecvt_base::result
+ ucs4_in(range<const char16_t>& from, range<char32_t>& to,
+ unsigned long maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ if (read_utf16_bom(from, mode) == little_endian)
+ mode = codecvt_mode(mode & little_endian);
+ while (from.size() && to.size())
+ {
+ const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
+ if (codepoint == char32_t(-1))
+ break;
+ if (codepoint > maxcode)
+ return codecvt_base::error;
+ *to.next++ = codepoint;
+ }
+ return from.size() ? codecvt_base::partial : codecvt_base::ok;
+ }
+
+ // ucs4 -> utf16
+ codecvt_base::result
+ ucs4_out(range<const char32_t>& from, range<char16_t>& to,
+ unsigned long maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ if (!write_utf16_bom(to, mode))
+ return codecvt_base::partial;
+ while (from.size())
+ {
+ const char32_t c = from.next[0];
+ if (c > maxcode)
+ return codecvt_base::error;
+ if (!write_utf16_code_point(to, c, mode))
+ return codecvt_base::partial;
+ ++from.next;
+ }
+ return codecvt_base::ok;
+ }
+
// utf8 -> utf16
+ template<typename C>
codecvt_base::result
- utf16_in(range<const char>& from, range<char16_t>& to,
- unsigned long maxcode = max_code_point)
+ utf16_in(range<const char>& from, range<C>& to,
+ unsigned long maxcode = max_code_point, codecvt_mode mode = {})
{
+ read_utf8_bom(from, mode);
while (from.size() && to.size())
{
const char* first = from.next;
if ((unsigned char)*first >= 0xF0 && to.size() < 2)
return codecvt_base::partial;
const char32_t codepoint = read_utf8_code_point(from, maxcode);
- if (codepoint == char32_t(-1) || codepoint > maxcode)
+ if (codepoint == char32_t(-1))
+ return codecvt_base::partial;
+ if (codepoint > maxcode)
return codecvt_base::error;
- if (!write_utf16_code_point(to, codepoint))
+ if (!write_utf16_code_point(to, codepoint, {}))
{
from.next = first;
return codecvt_base::partial;
@@ -246,15 +410,18 @@ namespace
}
// utf16 -> utf8
+ template<typename C>
codecvt_base::result
- utf16_out(range<const char16_t>& from, range<char>& to,
- unsigned long maxcode = max_code_point)
+ utf16_out(range<const C>& from, range<char>& to,
+ unsigned long maxcode = max_code_point, codecvt_mode mode = {})
{
+ if (!write_utf8_bom(to, mode))
+ return codecvt_base::partial;
while (from.size())
{
char32_t c = from.next[0];
int inc = 1;
- if (c >= 0xD800 && c < 0xDBFF) // start of surrogate pair
+ if (c >= 0xD800 && c <= 0xDBFF) // start of surrogate pair
{
if (from.size() < 2)
return codecvt_base::ok; // stop converting at this point
@@ -278,11 +445,12 @@ namespace
}
// return pos such that [begin,pos) is valid UTF-16 string no longer than max
- int
- utf16_len(const char* begin, const char* end, size_t max,
- char32_t maxcode = max_code_point)
+ const char*
+ utf16_span(const char* begin, const char* end, size_t max,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
{
range<const char> from{ begin, end };
+ read_utf8_bom(from, mode);
size_t count = 0;
while (count+1 < max)
{
@@ -295,24 +463,117 @@ namespace
}
if (count+1 == max) // take one more character if it fits in a single unit
read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode));
- return from.next - begin;
+ return from.next;
}
- // return pos such that [begin,pos) is valid UCS-4 string no longer than max
- int
- ucs4_len(const char* begin, const char* end, size_t max,
- char32_t maxcode = max_code_point)
+ // utf8 -> ucs2
+ codecvt_base::result
+ ucs2_in(range<const char>& from, range<char16_t>& to,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
{
- range<const char> from{ begin, end };
- size_t count = 0;
- while (count < max)
+ return utf16_in(from, to, std::max(max_single_utf16_unit, maxcode), mode);
+ }
+
+ // ucs2 -> utf8
+ codecvt_base::result
+ ucs2_out(range<const char16_t>& from, range<char>& to,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ return utf16_out(from, to, std::max(max_single_utf16_unit, maxcode), mode);
+ }
+
+ // ucs2 -> utf16
+ codecvt_base::result
+ ucs2_out(range<const char16_t>& from, range<char16_t>& to,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ if (!write_utf16_bom(to, mode))
+ return codecvt_base::partial;
+ while (from.size() && to.size())
{
- char32_t c = read_utf8_code_point(from, maxcode);
+ char16_t c = from.next[0];
+ if (c >= 0xD800 && c <= 0xDBFF) // start of surrogate pair
+ return codecvt_base::error;
+ if (c > maxcode)
+ return codecvt_base::error;
+ *to.next++ = adjust_byte_order(c, mode);
+ ++from.next;
+ }
+ return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
+ }
+
+ // utf16 -> ucs2
+ codecvt_base::result
+ ucs2_in(range<const char16_t>& from, range<char16_t>& to,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ if (read_utf16_bom(from, mode) == little_endian)
+ mode = codecvt_mode(mode & little_endian);
+ maxcode = std::max(max_single_utf16_unit, maxcode);
+ while (from.size() && to.size())
+ {
+ const char32_t c = read_utf16_code_point(from, maxcode, mode);
if (c == char32_t(-1))
break;
- ++count;
+ if (c >= maxcode)
+ return codecvt_base::error;
+ *to.next++ = c;
}
- return from.next - begin;
+ return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
+ }
+
+ const char16_t*
+ ucs2_span(const char16_t* begin, const char16_t* end, size_t max,
+ char32_t maxcode, codecvt_mode mode)
+ {
+ range<const char16_t> from{ begin, end };
+ if (read_utf16_bom(from, mode) == little_endian)
+ mode = codecvt_mode(mode & little_endian);
+ maxcode = std::max(max_single_utf16_unit, maxcode);
+ char32_t c = 0;
+ while (max-- && c <= maxcode)
+ c = read_utf16_code_point(from, maxcode, mode);
+ return from.next;
+ }
+
+ const char*
+ ucs2_span(const char* begin, const char* end, size_t max,
+ char32_t maxcode, codecvt_mode mode)
+ {
+ range<const char> from{ begin, end };
+ read_utf8_bom(from, mode);
+ maxcode = std::max(max_single_utf16_unit, maxcode);
+ char32_t c = 0;
+ while (max-- && c <= maxcode)
+ c = read_utf8_code_point(from, maxcode);
+ return from.next;
+ }
+
+ // return pos such that [begin,pos) is valid UCS-4 string no longer than max
+ const char*
+ ucs4_span(const char* begin, const char* end, size_t max,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ range<const char> from{ begin, end };
+ read_utf8_bom(from, mode);
+ char32_t c = 0;
+ while (max-- && c <= maxcode)
+ c = read_utf8_code_point(from, maxcode);
+ return from.next;
+ }
+
+ // return pos such that [begin,pos) is valid UCS-4 string no longer than max
+ const char16_t*
+ ucs4_span(const char16_t* begin, const char16_t* end, size_t max,
+ char32_t maxcode = max_code_point, codecvt_mode mode = {})
+ {
+ range<const char16_t> from{ begin, end };
+ if (read_utf16_bom(from, mode) == little_endian)
+ mode = codecvt_mode(mode & little_endian);
+ char32_t c = 0;
+ while (max-- && c <= maxcode)
+ c = read_utf16_code_point(from, maxcode, mode);
+ return from.next;
}
}
@@ -376,7 +637,8 @@ codecvt<char16_t, char, mbstate_t>::
do_length(state_type&, const extern_type* __from,
const extern_type* __end, size_t __max) const
{
- return utf16_len(__from, __end, __max);
+ __end = utf16_span(__from, __end, __max);
+ return __end - __from;
}
int
@@ -446,13 +708,698 @@ codecvt<char32_t, char, mbstate_t>::
do_length(state_type&, const extern_type* __from,
const extern_type* __end, size_t __max) const
{
- return ucs4_len(__from, __end, __max);
+ __end = ucs4_span(__from, __end, __max);
+ return __end - __from;
}
int
codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
{ return 4; }
+// Define members of codecvt_utf8<char16_t> base class implementation.
+// Converts from UTF-8 to UCS-2.
+
+__codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
+
+codecvt_base::result
+__codecvt_utf8_base<char16_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const char16_t> from{ __from, __from_end };
+ range<char> to{ __to, __to_end };
+ auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<char16_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<char16_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+ range<char16_t> to{ __to, __to_end };
+ auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf8_base<char16_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_base<char16_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
+ return __end - __from;
+}
+
+int
+__codecvt_utf8_base<char16_t>::do_max_length() const throw()
+{ return 3; }
+
+// Define members of codecvt_utf8<char32_t> base class implementation.
+// Converts from UTF-8 to UTF-32 (aka UCS-4).
+
+__codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
+
+codecvt_base::result
+__codecvt_utf8_base<char32_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const char32_t> from{ __from, __from_end };
+ range<char> to{ __to, __to_end };
+ auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<char32_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<char32_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+ range<char32_t> to{ __to, __to_end };
+ auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf8_base<char32_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_base<char32_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
+ return __end - __from;
+}
+
+int
+__codecvt_utf8_base<char32_t>::do_max_length() const throw()
+{ return 4; }
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+// Define members of codecvt_utf8<wchar_t> base class implementation.
+// Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
+
+__codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
+
+codecvt_base::result
+__codecvt_utf8_base<wchar_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<char> to{ __to, __to_end };
+#if __SIZEOF_WCHAR_T__ == 2
+ range<const char16_t> from{
+ reinterpret_cast<const char16_t*>(__from),
+ reinterpret_cast<const char16_t*>(__from_end)
+ };
+ auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+ range<const char32_t> from{
+ reinterpret_cast<const char32_t*>(__from),
+ reinterpret_cast<const char32_t*>(__from_end)
+ };
+ auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
+#else
+ return codecvt_base::error;
+#endif
+ __from_next = reinterpret_cast<const wchar_t*>(from.next);
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<wchar_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<wchar_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+#if __SIZEOF_WCHAR_T__ == 2
+ range<char16_t> to{
+ reinterpret_cast<char16_t*>(__to),
+ reinterpret_cast<char16_t*>(__to_end)
+ };
+ auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+ range<char32_t> to{
+ reinterpret_cast<char32_t*>(__to),
+ reinterpret_cast<char32_t*>(__to_end)
+ };
+ auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
+#else
+ return codecvt_base::error;
+#endif
+ __from_next = from.next;
+ __to_next = reinterpret_cast<wchar_t*>(to.next);
+ return res;
+}
+
+int
+__codecvt_utf8_base<wchar_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_base<wchar_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+#if __SIZEOF_WCHAR_T__ == 2
+ __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+ __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
+#else
+ __end = __from;
+#endif
+ return __end - __from;
+}
+
+int
+__codecvt_utf8_base<wchar_t>::do_max_length() const throw()
+{ return 4; }
+#endif
+
+// Define members of codecvt_utf16<char16_t> base class implementation.
+// Converts from UTF-16 to UCS-2.
+
+__codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf16_base<char16_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const char16_t> from{ __from, __from_end };
+ range<char16_t> to{
+ reinterpret_cast<char16_t*>(__to),
+ reinterpret_cast<char16_t*>(__to_end)
+ };
+ auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = reinterpret_cast<char*>(to.next);
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<char16_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<char16_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char16_t> from{
+ reinterpret_cast<const char16_t*>(__from),
+ reinterpret_cast<const char16_t*>(__from_end)
+ };
+ range<char16_t> to{ __to, __to_end };
+ auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+ __from_next = reinterpret_cast<const char*>(from.next);
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf16_base<char16_t>::do_encoding() const throw()
+{ return 1; }
+
+bool
+__codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf16_base<char16_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ auto next = reinterpret_cast<const char16_t*>(__from);
+ next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
+ _M_maxcode, _M_mode);
+ return reinterpret_cast<const char*>(next) - __from;
+}
+
+int
+__codecvt_utf16_base<char16_t>::do_max_length() const throw()
+{ return 3; }
+
+// Define members of codecvt_utf16<char32_t> base class implementation.
+// Converts from UTF-16 to UTF-32 (aka UCS-4).
+
+__codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf16_base<char32_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const char32_t> from{ __from, __from_end };
+ range<char16_t> to{
+ reinterpret_cast<char16_t*>(__to),
+ reinterpret_cast<char16_t*>(__to_end)
+ };
+ auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = reinterpret_cast<char*>(to.next);
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<char32_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<char32_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char16_t> from{
+ reinterpret_cast<const char16_t*>(__from),
+ reinterpret_cast<const char16_t*>(__from_end)
+ };
+ range<char32_t> to{ __to, __to_end };
+ auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
+ __from_next = reinterpret_cast<const char*>(from.next);
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf16_base<char32_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf16_base<char32_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ auto next = reinterpret_cast<const char16_t*>(__from);
+ next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
+ _M_maxcode, _M_mode);
+ return reinterpret_cast<const char*>(next) - __from;
+}
+
+int
+__codecvt_utf16_base<char32_t>::do_max_length() const throw()
+{ return 3; }
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+// Define members of codecvt_utf16<wchar_t> base class implementation.
+// Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
+
+__codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf16_base<wchar_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<char> to{ __to, __to_end };
+#if __SIZEOF_WCHAR_T__ == 2
+ range<const char16_t> from{
+ reinterpret_cast<const char16_t*>(__from),
+ reinterpret_cast<const char16_t*>(__from_end)
+ };
+ auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+ range<const char32_t> from{
+ reinterpret_cast<const char32_t*>(__from),
+ reinterpret_cast<const char32_t*>(__from_end)
+ };
+ auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
+#else
+ return codecvt_base::error;
+#endif
+ __from_next = reinterpret_cast<const wchar_t*>(from.next);
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<wchar_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<wchar_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+#if __SIZEOF_WCHAR_T__ == 2
+ range<char16_t> to{
+ reinterpret_cast<char16_t*>(__to),
+ reinterpret_cast<char16_t*>(__to_end)
+ };
+ auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+ range<char32_t> to{
+ reinterpret_cast<char32_t*>(__to),
+ reinterpret_cast<char32_t*>(__to_end)
+ };
+ auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
+#else
+ return codecvt_base::error;
+#endif
+ __from_next = from.next;
+ __to_next = reinterpret_cast<wchar_t*>(to.next);
+ return res;
+}
+
+int
+__codecvt_utf16_base<wchar_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf16_base<wchar_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ auto next = reinterpret_cast<const char16_t*>(__from);
+#if __SIZEOF_WCHAR_T__ == 2
+ next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
+ _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+ next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
+ _M_maxcode, _M_mode);
+#endif
+ return reinterpret_cast<const char*>(next) - __from;
+}
+
+int
+__codecvt_utf16_base<wchar_t>::do_max_length() const throw()
+{ return 4; }
+#endif
+
+// Define members of codecvt_utf8_utf16<char16_t> base class implementation.
+// Converts from UTF-8 to UTF-16.
+
+__codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char16_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const char16_t> from{ __from, __from_end };
+ range<char> to{ __to, __to_end };
+ auto res = utf16_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char16_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char16_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+ range<char16_t> to{ __to, __to_end };
+ auto res = utf16_in(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_utf16_base<char16_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
+ return __end - __from;
+}
+
+int
+__codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
+{
+ // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
+ // whereas 4 byte sequences require two 16-bit code units.
+ return 3;
+}
+
+// Define members of codecvt_utf8_utf16<char32_t> base class implementation.
+// Converts from UTF-8 to UTF-16.
+
+__codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char32_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const char32_t> from{ __from, __from_end };
+ range<char> to{ __to, __to_end };
+ auto res = utf16_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char32_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char32_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+ range<char32_t> to{ __to, __to_end };
+ auto res = utf16_in(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_utf16_base<char32_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
+ return __end - __from;
+}
+
+int
+__codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
+{
+ // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
+ // whereas 4 byte sequences require two 16-bit code units.
+ return 3;
+}
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+// Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
+// Converts from UTF-8 to UTF-16.
+
+__codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<wchar_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+ const intern_type*& __from_next,
+ extern_type* __to, extern_type* __to_end,
+ extern_type*& __to_next) const
+{
+ range<const wchar_t> from{ __from, __from_end };
+ range<char> to{ __to, __to_end };
+ auto res = utf16_out(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<wchar_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+ extern_type*& __to_next) const
+{
+ __to_next = __to;
+ return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<wchar_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+ const extern_type*& __from_next,
+ intern_type* __to, intern_type* __to_end,
+ intern_type*& __to_next) const
+{
+ range<const char> from{ __from, __from_end };
+ range<wchar_t> to{ __to, __to_end };
+ auto res = utf16_in(from, to, _M_maxcode, _M_mode);
+ __from_next = from.next;
+ __to_next = to.next;
+ return res;
+}
+
+int
+__codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_utf16_base<wchar_t>::
+do_length(state_type&, const extern_type* __from,
+ const extern_type* __end, size_t __max) const
+{
+ __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
+ return __end - __from;
+}
+
+int
+__codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
+{
+ // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
+ // whereas 4 byte sequences require two 16-bit code units.
+ return 3;
+}
+#endif
+
inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;