Implement C++11 <codecvt> header.

* config/abi/pre/gnu.ver: Export new symbols. * include/Makefile.am: Add codecvt. * include/Makefile.in: Regenerate. * include/std/codecvt: New header. * src/c++11/codecvt.cc (__codecvt_utf8_base, __codecvt_utf16_base, __codecvt_utf8_utf16_base): Define specializations. * testsuite/22_locale/codecvt/codecvt_utf8/requirements/1.cc: New. * testsuite/22_locale/codecvt/codecvt_utf16/requirements/1.cc: New. * testsuite/22_locale/codecvt/codecvt_utf8_utf16/requirements/1.cc: New. From-SVN: r219779
author: Jonathan Wakely <jwakely@redhat.com> 2015-01-16 23:38:35 +0000
committer: Jonathan Wakely <redi@gcc.gnu.org> 2015-01-16 23:38:35 +0000
commit: 28af1fb39dfbf903ccafeafda927d280fd8768d8 (patch)
tree: 8c798f7195e89e40829e60d3b8f2b3e02730a5ea /libstdc++-v3/src
parent: 0f59f5c12003e547e7557cc09da39c2abd63403d (diff)
download: gcc-28af1fb39dfbf903ccafeafda927d280fd8768d8.zip
gcc-28af1fb39dfbf903ccafeafda927d280fd8768d8.tar.gz
gcc-28af1fb39dfbf903ccafeafda927d280fd8768d8.tar.bz2
1 files changed, 988 insertions, 41 deletions
diff --git a/libstdc++-v3/src/c++11/codecvt.cc b/libstdc++-v3/src/c++11/codecvt.cc
index fdd4972..7eed903 100644
--- a/libstdc++-v3/src/c++11/codecvt.cc
+++ b/libstdc++-v3/src/c++11/codecvt.cc
@@ -22,10 +22,9 @@
 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 // <http://www.gnu.org/licenses/>.
 
-#include <bits/locale_classes.h>
-#include <bits/codecvt.h>
-#include <bits/stl_algobase.h>	// std::max
+#include <codecvt>
 #include <cstring>		// std::memcpy, std::memcmp
+#include <bits/stl_algobase.h>	// std::max
 
 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
 namespace std _GLIBCXX_VISIBILITY(default)
@@ -51,6 +50,88 @@ namespace
       size_t size() const { return end - next; }
     };
 
+  // Multibyte sequences can have "header" consisting of Byte Order Mark
+  const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
+  const unsigned char utf16_bom[4] = { 0xFE, 0xFF };
+  const unsigned char utf16le_bom[4] = { 0xFF, 0xFE };
+
+  template<size_t N>
+    inline bool
+    write_bom(range<char>& to, const unsigned char (&bom)[N])
+    {
+      if (to.size() < N)
+	return false;
+      memcpy(to.next, bom, N);
+      to.next += N;
+      return true;
+    }
+
+  // If generate_header is set in mode write out UTF-8 BOM.
+  bool
+  write_utf8_bom(range<char>& to, codecvt_mode mode)
+  {
+    if (mode & generate_header)
+      return write_bom(to, utf8_bom);
+    return true;
+  }
+
+  // If generate_header is set in mode write out the UTF-16 BOM indicated
+  // by whether little_endian is set in mode.
+  bool
+  write_utf16_bom(range<char16_t>& to, codecvt_mode mode)
+  {
+    if (mode & generate_header)
+    {
+      if (!to.size())
+	return false;
+      auto* bom = (mode & little_endian) ? utf16le_bom : utf16_bom;
+      std::memcpy(to.next, bom, 2);
+      ++to.next;
+    }
+    return true;
+  }
+
+  template<size_t N>
+    inline bool
+    read_bom(range<const char>& from, const unsigned char (&bom)[N])
+    {
+      if (from.size() >= N && !memcmp(from.next, bom, N))
+	{
+	  from.next += N;
+	  return true;
+	}
+      return false;
+    }
+
+  // If consume_header is set in mode update from.next to after any BOM.
+  void
+  read_utf8_bom(range<const char>& from, codecvt_mode mode)
+  {
+    if (mode & consume_header)
+      read_bom(from, utf8_bom);
+  }
+
+  // If consume_header is set in mode update from.next to after any BOM.
+  // Return little_endian iff the UTF-16LE BOM was present.
+  codecvt_mode
+  read_utf16_bom(range<const char16_t>& from, codecvt_mode mode)
+  {
+    if (mode & consume_header && from.size())
+      {
+	if (*from.next == 0xFEFF)
+	  ++from.next;
+	else if (*from.next == 0xFFFE)
+	  {
+	    ++from.next;
+	    return little_endian;
+	  }
+      }
+    return {};
+  }
+
+  // Read a codepoint from a UTF-8 multibyte sequence.
+  // Updates from.next if the codepoint is not greater than maxcode.
+  // Returns -1 if there is an invalid or incomplete multibyte character.
   char32_t
   read_utf8_code_point(range<const char>& from, unsigned long maxcode)
   {
@@ -74,9 +155,8 @@ namespace
       if ((c2 & 0xC0) != 0x80)
 	return -1;
       char32_t c = (c1 << 6) + c2 - 0x3080;
-      if (c > maxcode)
-	return -1;
-      from.next += 2;
+      if (c <= maxcode)
+	from.next += 2;
       return c;
     }
     else if (c1 < 0xF0) // 3-byte sequence
@@ -92,9 +172,8 @@ namespace
       if ((c3 & 0xC0) != 0x80)
 	return -1;
       char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
-      if (c > maxcode)
-	return -1;
-      from.next += 3;
+      if (c <= maxcode)
+	from.next += 3;
       return c;
     }
     else if (c1 < 0xF5) // 4-byte sequence
@@ -115,9 +194,8 @@ namespace
       if ((c4 & 0xC0) != 0x80)
 	return -1;
       char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
-      if (c > maxcode)
-	return -1;
-      from.next += 4;
+      if (c <= maxcode)
+	from.next += 4;
       return c;
     }
     else // > U+10FFFF
@@ -162,9 +240,48 @@ namespace
     return true;
   }
 
+  inline char16_t
+  adjust_byte_order(char16_t c, codecvt_mode mode)
+  {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    return (mode & little_endian) ? __builtin_bswap16(c) : c;
+#else
+    return (mode & little_endian) ? c : __builtin_bswap16(c);
+#endif
+  }
+
+  // Read a codepoint from a UTF-16 multibyte sequence.
+  // The sequence's endianness is indicated by (mode & little_endian).
+  // Updates from.next if the codepoint is not greater than maxcode.
+  // Returns -1 if there is an incomplete multibyte character.
+  char32_t
+  read_utf16_code_point(range<const char16_t>& from, unsigned long maxcode,
+			codecvt_mode mode)
+  {
+    int inc = 1;
+    char32_t c = adjust_byte_order(from.next[0], mode);
+    if (c >= 0xD800 && c <= 0xDBFF)
+      {
+	if (from.size() < 2)
+	  return -1;
+	const char16_t c2 = adjust_byte_order(from.next[1], mode);
+	if (c2 >= 0xDC00 && c2 <= 0xDFFF)
+	  {
+	    c = (c << 10) + c2 - 0x35FDC00;
+	    inc = 2;
+	  }
+      }
+    if (c <= maxcode)
+      from.next += inc;
+    return c;
+  }
+
+  template<typename C>
   bool
-  write_utf16_code_point(range<char16_t>& to, char32_t codepoint)
+  write_utf16_code_point(range<C>& to, char32_t codepoint, codecvt_mode mode)
   {
+    static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
+
     if (codepoint < max_single_utf16_unit)
       {
 	if (to.size() > 0)
@@ -183,8 +300,8 @@ namespace
 	char16_t trail = 0xDC00 + (codepoint & 0x3FF);
 	char32_t utf16bytes = (lead << 10) + trail + SURROGATE_OFFSET;
 
-	to.next[0] = utf16bytes >> 16;
-	to.next[1] = utf16bytes & 0xFFFF;
+	to.next[0] = adjust_byte_order(utf16bytes >> 16, mode);
+	to.next[1] = adjust_byte_order(utf16bytes & 0xFFFF, mode);
 	to.next += 2;
 	return true;
       }
@@ -194,12 +311,15 @@ namespace
   // utf8 -> ucs4
   codecvt_base::result
   ucs4_in(range<const char>& from, range<char32_t>& to,
-          unsigned long maxcode = max_code_point)
+          unsigned long maxcode = max_code_point, codecvt_mode mode = {})
   {
+    read_utf8_bom(from, mode);
     while (from.size() && to.size())
       {
 	const char32_t codepoint = read_utf8_code_point(from, maxcode);
-	if (codepoint == char32_t(-1) || codepoint > maxcode)
+	if (codepoint == char32_t(-1))
+	  break;
+	if (codepoint > maxcode)
 	  return codecvt_base::error;
 	*to.next++ = codepoint;
       }
@@ -209,8 +329,10 @@ namespace
   // ucs4 -> utf8
   codecvt_base::result
   ucs4_out(range<const char32_t>& from, range<char>& to,
-           unsigned long maxcode = max_code_point)
+           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
   {
+    if (!write_utf8_bom(to, mode))
+      return codecvt_base::partial;
     while (from.size())
       {
 	const char32_t c = from.next[0];
@@ -223,20 +345,62 @@ namespace
     return codecvt_base::ok;
   }
 
+  // utf16 -> ucs4
+  codecvt_base::result
+  ucs4_in(range<const char16_t>& from, range<char32_t>& to,
+          unsigned long maxcode = max_code_point, codecvt_mode mode = {})
+  {
+    if (read_utf16_bom(from, mode) == little_endian)
+      mode = codecvt_mode(mode & little_endian);
+    while (from.size() && to.size())
+      {
+	const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
+	if (codepoint == char32_t(-1))
+	  break;
+	if (codepoint > maxcode)
+	  return codecvt_base::error;
+	*to.next++ = codepoint;
+      }
+    return from.size() ? codecvt_base::partial : codecvt_base::ok;
+  }
+
+  // ucs4 -> utf16
+  codecvt_base::result
+  ucs4_out(range<const char32_t>& from, range<char16_t>& to,
+           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
+  {
+    if (!write_utf16_bom(to, mode))
+      return codecvt_base::partial;
+    while (from.size())
+      {
+	const char32_t c = from.next[0];
+	if (c > maxcode)
+	  return codecvt_base::error;
+	if (!write_utf16_code_point(to, c, mode))
+	  return codecvt_base::partial;
+	++from.next;
+      }
+    return codecvt_base::ok;
+  }
+
   // utf8 -> utf16
+  template<typename C>
   codecvt_base::result
-  utf16_in(range<const char>& from, range<char16_t>& to,
-           unsigned long maxcode = max_code_point)
+  utf16_in(range<const char>& from, range<C>& to,
+           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
   {
+    read_utf8_bom(from, mode);
     while (from.size() && to.size())
       {
 	const char* first = from.next;
 	if ((unsigned char)*first >= 0xF0 && to.size() < 2)
 	  return codecvt_base::partial;
 	const char32_t codepoint = read_utf8_code_point(from, maxcode);
-	if (codepoint == char32_t(-1) || codepoint > maxcode)
+	if (codepoint == char32_t(-1))
+	  return codecvt_base::partial;
+	if (codepoint > maxcode)
 	  return codecvt_base::error;
-	if (!write_utf16_code_point(to, codepoint))
+	if (!write_utf16_code_point(to, codepoint, {}))
 	  {
 	    from.next = first;
 	    return codecvt_base::partial;
@@ -246,15 +410,18 @@ namespace
   }
 
   // utf16 -> utf8
+  template<typename C>
   codecvt_base::result
-  utf16_out(range<const char16_t>& from, range<char>& to,
-            unsigned long maxcode = max_code_point)
+  utf16_out(range<const C>& from, range<char>& to,
+            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
   {
+    if (!write_utf8_bom(to, mode))
+      return codecvt_base::partial;
     while (from.size())
       {
 	char32_t c = from.next[0];
 	int inc = 1;
-	if (c >= 0xD800 && c < 0xDBFF) // start of surrogate pair
+	if (c >= 0xD800 && c <= 0xDBFF) // start of surrogate pair
 	  {
 	    if (from.size() < 2)
 	      return codecvt_base::ok; // stop converting at this point
@@ -278,11 +445,12 @@ namespace
   }
 
   // return pos such that [begin,pos) is valid UTF-16 string no longer than max
-  int
-  utf16_len(const char* begin, const char* end, size_t max,
-            char32_t maxcode = max_code_point)
+  const char*
+  utf16_span(const char* begin, const char* end, size_t max,
+	     char32_t maxcode = max_code_point, codecvt_mode mode = {})
   {
     range<const char> from{ begin, end };
+    read_utf8_bom(from, mode);
     size_t count = 0;
     while (count+1 < max)
       {
@@ -295,24 +463,117 @@ namespace
       }
     if (count+1 == max) // take one more character if it fits in a single unit
       read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode));
-    return from.next - begin;
+    return from.next;
   }
 
-  // return pos such that [begin,pos) is valid UCS-4 string no longer than max
-  int
-  ucs4_len(const char* begin, const char* end, size_t max,
-            char32_t maxcode = max_code_point)
+  // utf8 -> ucs2
+  codecvt_base::result
+  ucs2_in(range<const char>& from, range<char16_t>& to,
+	  char32_t maxcode = max_code_point, codecvt_mode mode = {})
   {
-    range<const char> from{ begin, end };
-    size_t count = 0;
-    while (count < max)
+    return utf16_in(from, to, std::max(max_single_utf16_unit, maxcode), mode);
+  }
+
+  // ucs2 -> utf8
+  codecvt_base::result
+  ucs2_out(range<const char16_t>& from, range<char>& to,
+	   char32_t maxcode = max_code_point, codecvt_mode mode = {})
+  {
+    return utf16_out(from, to, std::max(max_single_utf16_unit, maxcode), mode);
+  }
+
+  // ucs2 -> utf16
+  codecvt_base::result
+  ucs2_out(range<const char16_t>& from, range<char16_t>& to,
+	   char32_t maxcode = max_code_point, codecvt_mode mode = {})
+  {
+    if (!write_utf16_bom(to, mode))
+      return codecvt_base::partial;
+    while (from.size() && to.size())
       {
-	char32_t c = read_utf8_code_point(from, maxcode);
+	char16_t c = from.next[0];
+	if (c >= 0xD800 && c <= 0xDBFF) // start of surrogate pair
+	  return codecvt_base::error;
+	if (c > maxcode)
+	  return codecvt_base::error;
+	*to.next++ = adjust_byte_order(c, mode);
+	++from.next;
+      }
+    return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
+  }
+
+  // utf16 -> ucs2
+  codecvt_base::result
+  ucs2_in(range<const char16_t>& from, range<char16_t>& to,
+	  char32_t maxcode = max_code_point, codecvt_mode mode = {})
+  {
+    if (read_utf16_bom(from, mode) == little_endian)
+      mode = codecvt_mode(mode & little_endian);
+    maxcode = std::max(max_single_utf16_unit, maxcode);
+    while (from.size() && to.size())
+      {
+	const char32_t c = read_utf16_code_point(from, maxcode, mode);
 	if (c == char32_t(-1))
 	  break;
-	++count;
+	if (c >= maxcode)
+	  return codecvt_base::error;
+	*to.next++ = c;
       }
-    return from.next - begin;
+    return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
+  }
+
+  const char16_t*
+  ucs2_span(const char16_t* begin, const char16_t* end, size_t max,
+            char32_t maxcode, codecvt_mode mode)
+  {
+    range<const char16_t> from{ begin, end };
+    if (read_utf16_bom(from, mode) == little_endian)
+      mode = codecvt_mode(mode & little_endian);
+    maxcode = std::max(max_single_utf16_unit, maxcode);
+    char32_t c = 0;
+    while (max-- && c <= maxcode)
+      c = read_utf16_code_point(from, maxcode, mode);
+    return from.next;
+  }
+
+  const char*
+  ucs2_span(const char* begin, const char* end, size_t max,
+            char32_t maxcode, codecvt_mode mode)
+  {
+    range<const char> from{ begin, end };
+    read_utf8_bom(from, mode);
+    maxcode = std::max(max_single_utf16_unit, maxcode);
+    char32_t c = 0;
+    while (max-- && c <= maxcode)
+      c = read_utf8_code_point(from, maxcode);
+    return from.next;
+  }
+
+  // return pos such that [begin,pos) is valid UCS-4 string no longer than max
+  const char*
+  ucs4_span(const char* begin, const char* end, size_t max,
+            char32_t maxcode = max_code_point, codecvt_mode mode = {})
+  {
+    range<const char> from{ begin, end };
+    read_utf8_bom(from, mode);
+    char32_t c = 0;
+    while (max-- && c <= maxcode)
+      c = read_utf8_code_point(from, maxcode);
+    return from.next;
+  }
+
+  // return pos such that [begin,pos) is valid UCS-4 string no longer than max
+  const char16_t*
+  ucs4_span(const char16_t* begin, const char16_t* end, size_t max,
+            char32_t maxcode = max_code_point, codecvt_mode mode = {})
+  {
+    range<const char16_t> from{ begin, end };
+    if (read_utf16_bom(from, mode) == little_endian)
+      mode = codecvt_mode(mode & little_endian);
+    char32_t c = 0;
+    while (max-- && c <= maxcode)
+      c = read_utf16_code_point(from, maxcode, mode);
+    return from.next;
   }
 }
 
@@ -376,7 +637,8 @@ codecvt<char16_t, char, mbstate_t>::
 do_length(state_type&, const extern_type* __from,
 	  const extern_type* __end, size_t __max) const
 {
-  return utf16_len(__from, __end, __max);
+  __end = utf16_span(__from, __end, __max);
+  return __end - __from;
 }
 
 int
@@ -446,13 +708,698 @@ codecvt<char32_t, char, mbstate_t>::
 do_length(state_type&, const extern_type* __from,
 	  const extern_type* __end, size_t __max) const
 {
-  return ucs4_len(__from, __end, __max);
+  __end = ucs4_span(__from, __end, __max);
+  return __end - __from;
 }
 
 int
 codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
 { return 4; }
 
+// Define members of codecvt_utf8<char16_t> base class implementation.
+// Converts from UTF-8 to UCS-2.
+
+__codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
+
+codecvt_base::result
+__codecvt_utf8_base<char16_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+       const intern_type*& __from_next,
+       extern_type* __to, extern_type* __to_end,
+       extern_type*& __to_next) const
+{
+  range<const char16_t> from{ __from, __from_end };
+  range<char> to{ __to, __to_end };
+  auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<char16_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+	   extern_type*& __to_next) const
+{
+  __to_next = __to;
+  return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<char16_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+      const extern_type*& __from_next,
+      intern_type* __to, intern_type* __to_end,
+      intern_type*& __to_next) const
+{
+  range<const char> from{ __from, __from_end };
+  range<char16_t> to{ __to, __to_end };
+  auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+int
+__codecvt_utf8_base<char16_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_base<char16_t>::
+do_length(state_type&, const extern_type* __from,
+	  const extern_type* __end, size_t __max) const
+{
+  __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
+  return __end - __from;
+}
+
+int
+__codecvt_utf8_base<char16_t>::do_max_length() const throw()
+{ return 3; }
+
+// Define members of codecvt_utf8<char32_t> base class implementation.
+// Converts from UTF-8 to UTF-32 (aka UCS-4).
+
+__codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
+
+codecvt_base::result
+__codecvt_utf8_base<char32_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+       const intern_type*& __from_next,
+       extern_type* __to, extern_type* __to_end,
+       extern_type*& __to_next) const
+{
+  range<const char32_t> from{ __from, __from_end };
+  range<char> to{ __to, __to_end };
+  auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<char32_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+	   extern_type*& __to_next) const
+{
+  __to_next = __to;
+  return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<char32_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+      const extern_type*& __from_next,
+      intern_type* __to, intern_type* __to_end,
+      intern_type*& __to_next) const
+{
+  range<const char> from{ __from, __from_end };
+  range<char32_t> to{ __to, __to_end };
+  auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+int
+__codecvt_utf8_base<char32_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_base<char32_t>::
+do_length(state_type&, const extern_type* __from,
+	  const extern_type* __end, size_t __max) const
+{
+  __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
+  return __end - __from;
+}
+
+int
+__codecvt_utf8_base<char32_t>::do_max_length() const throw()
+{ return 4; }
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+// Define members of codecvt_utf8<wchar_t> base class implementation.
+// Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
+
+__codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
+
+codecvt_base::result
+__codecvt_utf8_base<wchar_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+       const intern_type*& __from_next,
+       extern_type* __to, extern_type* __to_end,
+       extern_type*& __to_next) const
+{
+  range<char> to{ __to, __to_end };
+#if __SIZEOF_WCHAR_T__ == 2
+  range<const char16_t> from{
+    reinterpret_cast<const char16_t*>(__from),
+    reinterpret_cast<const char16_t*>(__from_end)
+  };
+  auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+  range<const char32_t> from{
+    reinterpret_cast<const char32_t*>(__from),
+    reinterpret_cast<const char32_t*>(__from_end)
+  };
+  auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
+#else
+  return codecvt_base::error;
+#endif
+  __from_next = reinterpret_cast<const wchar_t*>(from.next);
+  __to_next = to.next;
+  return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<wchar_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+	   extern_type*& __to_next) const
+{
+  __to_next = __to;
+  return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_base<wchar_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+      const extern_type*& __from_next,
+      intern_type* __to, intern_type* __to_end,
+      intern_type*& __to_next) const
+{
+  range<const char> from{ __from, __from_end };
+#if __SIZEOF_WCHAR_T__ == 2
+  range<char16_t> to{
+    reinterpret_cast<char16_t*>(__to),
+    reinterpret_cast<char16_t*>(__to_end)
+  };
+  auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+  range<char32_t> to{
+    reinterpret_cast<char32_t*>(__to),
+    reinterpret_cast<char32_t*>(__to_end)
+  };
+  auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
+#else
+  return codecvt_base::error;
+#endif
+  __from_next = from.next;
+  __to_next = reinterpret_cast<wchar_t*>(to.next);
+  return res;
+}
+
+int
+__codecvt_utf8_base<wchar_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_base<wchar_t>::
+do_length(state_type&, const extern_type* __from,
+	  const extern_type* __end, size_t __max) const
+{
+#if __SIZEOF_WCHAR_T__ == 2
+  __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+  __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
+#else
+  __end = __from;
+#endif
+  return __end - __from;
+}
+
+int
+__codecvt_utf8_base<wchar_t>::do_max_length() const throw()
+{ return 4; }
+#endif
+
+// Define members of codecvt_utf16<char16_t> base class implementation.
+// Converts from UTF-16 to UCS-2.
+
+__codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf16_base<char16_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+       const intern_type*& __from_next,
+       extern_type* __to, extern_type* __to_end,
+       extern_type*& __to_next) const
+{
+  range<const char16_t> from{ __from, __from_end };
+  range<char16_t> to{
+    reinterpret_cast<char16_t*>(__to),
+    reinterpret_cast<char16_t*>(__to_end)
+  };
+  auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
+  __from_next = from.next;
+  __to_next = reinterpret_cast<char*>(to.next);
+  return res;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<char16_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+	   extern_type*& __to_next) const
+{
+  __to_next = __to;
+  return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<char16_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+      const extern_type*& __from_next,
+      intern_type* __to, intern_type* __to_end,
+      intern_type*& __to_next) const
+{
+  range<const char16_t> from{
+    reinterpret_cast<const char16_t*>(__from),
+    reinterpret_cast<const char16_t*>(__from_end)
+  };
+  range<char16_t> to{ __to, __to_end };
+  auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+  __from_next = reinterpret_cast<const char*>(from.next);
+  __to_next = to.next;
+  return res;
+}
+
+int
+__codecvt_utf16_base<char16_t>::do_encoding() const throw()
+{ return 1; }
+
+bool
+__codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf16_base<char16_t>::
+do_length(state_type&, const extern_type* __from,
+	  const extern_type* __end, size_t __max) const
+{
+  auto next = reinterpret_cast<const char16_t*>(__from);
+  next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
+		   _M_maxcode, _M_mode);
+  return reinterpret_cast<const char*>(next) - __from;
+}
+
+int
+__codecvt_utf16_base<char16_t>::do_max_length() const throw()
+{ return 3; }
+
+// Define members of codecvt_utf16<char32_t> base class implementation.
+// Converts from UTF-16 to UTF-32 (aka UCS-4).
+
+__codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf16_base<char32_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+       const intern_type*& __from_next,
+       extern_type* __to, extern_type* __to_end,
+       extern_type*& __to_next) const
+{
+  range<const char32_t> from{ __from, __from_end };
+  range<char16_t> to{
+    reinterpret_cast<char16_t*>(__to),
+    reinterpret_cast<char16_t*>(__to_end)
+  };
+  auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
+  __from_next = from.next;
+  __to_next = reinterpret_cast<char*>(to.next);
+  return res;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<char32_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+	   extern_type*& __to_next) const
+{
+  __to_next = __to;
+  return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<char32_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+      const extern_type*& __from_next,
+      intern_type* __to, intern_type* __to_end,
+      intern_type*& __to_next) const
+{
+  range<const char16_t> from{
+    reinterpret_cast<const char16_t*>(__from),
+    reinterpret_cast<const char16_t*>(__from_end)
+  };
+  range<char32_t> to{ __to, __to_end };
+  auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
+  __from_next = reinterpret_cast<const char*>(from.next);
+  __to_next = to.next;
+  return res;
+}
+
+int
+__codecvt_utf16_base<char32_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf16_base<char32_t>::
+do_length(state_type&, const extern_type* __from,
+	  const extern_type* __end, size_t __max) const
+{
+  auto next = reinterpret_cast<const char16_t*>(__from);
+  next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
+		   _M_maxcode, _M_mode);
+  return reinterpret_cast<const char*>(next) - __from;
+}
+
+int
+__codecvt_utf16_base<char32_t>::do_max_length() const throw()
+{ return 3; }
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+// Define members of codecvt_utf16<wchar_t> base class implementation.
+// Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
+
+__codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf16_base<wchar_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+       const intern_type*& __from_next,
+       extern_type* __to, extern_type* __to_end,
+       extern_type*& __to_next) const
+{
+  range<char> to{ __to, __to_end };
+#if __SIZEOF_WCHAR_T__ == 2
+  range<const char16_t> from{
+    reinterpret_cast<const char16_t*>(__from),
+    reinterpret_cast<const char16_t*>(__from_end)
+  };
+  auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+  range<const char32_t> from{
+    reinterpret_cast<const char32_t*>(__from),
+    reinterpret_cast<const char32_t*>(__from_end)
+  };
+  auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
+#else
+  return codecvt_base::error;
+#endif
+  __from_next = reinterpret_cast<const wchar_t*>(from.next);
+  __to_next = to.next;
+  return res;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<wchar_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+	   extern_type*& __to_next) const
+{
+  __to_next = __to;
+  return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf16_base<wchar_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+      const extern_type*& __from_next,
+      intern_type* __to, intern_type* __to_end,
+      intern_type*& __to_next) const
+{
+  range<const char> from{ __from, __from_end };
+#if __SIZEOF_WCHAR_T__ == 2
+  range<char16_t> to{
+    reinterpret_cast<char16_t*>(__to),
+    reinterpret_cast<char16_t*>(__to_end)
+  };
+  auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+  range<char32_t> to{
+    reinterpret_cast<char32_t*>(__to),
+    reinterpret_cast<char32_t*>(__to_end)
+  };
+  auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
+#else
+  return codecvt_base::error;
+#endif
+  __from_next = from.next;
+  __to_next = reinterpret_cast<wchar_t*>(to.next);
+  return res;
+}
+
+int
+__codecvt_utf16_base<wchar_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf16_base<wchar_t>::
+do_length(state_type&, const extern_type* __from,
+	  const extern_type* __end, size_t __max) const
+{
+  auto next = reinterpret_cast<const char16_t*>(__from);
+#if __SIZEOF_WCHAR_T__ == 2
+  next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
+		   _M_maxcode, _M_mode);
+#elif __SIZEOF_WCHAR_T__ == 4
+  next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
+		   _M_maxcode, _M_mode);
+#endif
+  return reinterpret_cast<const char*>(next) - __from;
+}
+
+int
+__codecvt_utf16_base<wchar_t>::do_max_length() const throw()
+{ return 4; }
+#endif
+
+// Define members of codecvt_utf8_utf16<char16_t> base class implementation.
+// Converts from UTF-8 to UTF-16.
+
+__codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char16_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+       const intern_type*& __from_next,
+       extern_type* __to, extern_type* __to_end,
+       extern_type*& __to_next) const
+{
+  range<const char16_t> from{ __from, __from_end };
+  range<char> to{ __to, __to_end };
+  auto res = utf16_out(from, to, _M_maxcode, _M_mode);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char16_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+	   extern_type*& __to_next) const
+{
+  __to_next = __to;
+  return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char16_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+      const extern_type*& __from_next,
+      intern_type* __to, intern_type* __to_end,
+      intern_type*& __to_next) const
+{
+  range<const char> from{ __from, __from_end };
+  range<char16_t> to{ __to, __to_end };
+  auto res = utf16_in(from, to, _M_maxcode, _M_mode);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+int
+__codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_utf16_base<char16_t>::
+do_length(state_type&, const extern_type* __from,
+	  const extern_type* __end, size_t __max) const
+{
+  __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
+  return __end - __from;
+}
+
+int
+__codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
+{
+  // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
+  // whereas 4 byte sequences require two 16-bit code units.
+  return 3;
+}
+
+// Define members of codecvt_utf8_utf16<char32_t> base class implementation.
+// Converts from UTF-8 to UTF-16.
+
+__codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char32_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+       const intern_type*& __from_next,
+       extern_type* __to, extern_type* __to_end,
+       extern_type*& __to_next) const
+{
+  range<const char32_t> from{ __from, __from_end };
+  range<char> to{ __to, __to_end };
+  auto res = utf16_out(from, to, _M_maxcode, _M_mode);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char32_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+	   extern_type*& __to_next) const
+{
+  __to_next = __to;
+  return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<char32_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+      const extern_type*& __from_next,
+      intern_type* __to, intern_type* __to_end,
+      intern_type*& __to_next) const
+{
+  range<const char> from{ __from, __from_end };
+  range<char32_t> to{ __to, __to_end };
+  auto res = utf16_in(from, to, _M_maxcode, _M_mode);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+int
+__codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_utf16_base<char32_t>::
+do_length(state_type&, const extern_type* __from,
+	  const extern_type* __end, size_t __max) const
+{
+  __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
+  return __end - __from;
+}
+
+int
+__codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
+{
+  // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
+  // whereas 4 byte sequences require two 16-bit code units.
+  return 3;
+}
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+// Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
+// Converts from UTF-8 to UTF-16.
+
+__codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<wchar_t>::
+do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
+       const intern_type*& __from_next,
+       extern_type* __to, extern_type* __to_end,
+       extern_type*& __to_next) const
+{
+  range<const wchar_t> from{ __from, __from_end };
+  range<char> to{ __to, __to_end };
+  auto res = utf16_out(from, to, _M_maxcode, _M_mode);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<wchar_t>::
+do_unshift(state_type&, extern_type* __to, extern_type*,
+	   extern_type*& __to_next) const
+{
+  __to_next = __to;
+  return noconv;
+}
+
+codecvt_base::result
+__codecvt_utf8_utf16_base<wchar_t>::
+do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
+      const extern_type*& __from_next,
+      intern_type* __to, intern_type* __to_end,
+      intern_type*& __to_next) const
+{
+  range<const char> from{ __from, __from_end };
+  range<wchar_t> to{ __to, __to_end };
+  auto res = utf16_in(from, to, _M_maxcode, _M_mode);
+  __from_next = from.next;
+  __to_next = to.next;
+  return res;
+}
+
+int
+__codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
+{ return 0; }
+
+bool
+__codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
+{ return false; }
+
+int
+__codecvt_utf8_utf16_base<wchar_t>::
+do_length(state_type&, const extern_type* __from,
+	  const extern_type* __end, size_t __max) const
+{
+  __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
+  return __end - __from;
+}
+
+int
+__codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
+{
+  // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
+  // whereas 4 byte sequences require two 16-bit code units.
+  return 3;
+}
+#endif
+
 inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
 inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
author	Jonathan Wakely <jwakely@redhat.com>	2015-01-16 23:38:35 +0000
committer	Jonathan Wakely <redi@gcc.gnu.org>	2015-01-16 23:38:35 +0000
commit	28af1fb39dfbf903ccafeafda927d280fd8768d8 (patch)
tree	8c798f7195e89e40829e60d3b8f2b3e02730a5ea /libstdc++-v3/src
parent	0f59f5c12003e547e7557cc09da39c2abd63403d (diff)
download	gcc-28af1fb39dfbf903ccafeafda927d280fd8768d8.zip gcc-28af1fb39dfbf903ccafeafda927d280fd8768d8.tar.gz gcc-28af1fb39dfbf903ccafeafda927d280fd8768d8.tar.bz2