aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2009-05-03 12:59:26 +0100
committerJoseph Myers <jsm28@gcc.gnu.org>2009-05-03 12:59:26 +0100
commit9e322bc1a5bea706d6f48c15ccba1fc9a8841012 (patch)
tree1520bfcfc1cca9e314890ea375efe86771b5799e
parentaea88c05c9e0ffe0a2a4c595a71d023b581595aa (diff)
downloadgcc-9e322bc1a5bea706d6f48c15ccba1fc9a8841012.zip
gcc-9e322bc1a5bea706d6f48c15ccba1fc9a8841012.tar.gz
gcc-9e322bc1a5bea706d6f48c15ccba1fc9a8841012.tar.bz2
charset.c (one_utf8_to_cppchar): Correct mask used for 5-byte UTF-8 sequences.
libcpp: * charset.c (one_utf8_to_cppchar): Correct mask used for 5-byte UTF-8 sequences. gcc/testsuite: * gcc.dg/cpp/utf8-5byte-1.c: New test. From-SVN: r147073
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c17
-rw-r--r--libcpp/ChangeLog5
-rw-r--r--libcpp/charset.c2
4 files changed, 27 insertions, 1 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index da4f0e2..bdb6f44 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2009-05-03 Joseph Myers <joseph@codesourcery.com>
+
+ * gcc.dg/cpp/utf8-5byte-1.c: New test.
+
2009-05-02 Joseph Myers <joseph@codesourcery.com>
* gcc.dg/ucnid-6.c: Fix typo in dg-do directive.
diff --git a/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c b/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c
new file mode 100644
index 0000000..7f96a56
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c
@@ -0,0 +1,17 @@
+/* Test for bug in conversions from 5-byte UTF-8 sequences in
+ cpplib. */
+/* { dg-do run { target { 4byte_wchar_t } } } */
+/* { dg-options "-std=gnu99" } */
+
+extern void abort (void);
+extern void exit (int);
+
+__WCHAR_TYPE__ ws[] = L"û¿¿¿¿";
+
+int
+main (void)
+{
+ if (ws[0] != L'\U03FFFFFF' || ws[1] != 0)
+ abort ();
+ exit (0);
+}
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 24f3f8d..a541b69 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,8 @@
+2009-05-03 Joseph Myers <joseph@codesourcery.com>
+
+ * charset.c (one_utf8_to_cppchar): Correct mask used for 5-byte
+ UTF-8 sequences.
+
2009-04-25 Joseph Myers <joseph@codesourcery.com>
PR preprocessor/39559
diff --git a/libcpp/charset.c b/libcpp/charset.c
index e743b1e..f1da426 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -169,7 +169,7 @@ static inline int
one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
cppchar_t *cp)
{
- static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
+ static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
cppchar_t c;