diff options
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c | 17 | ||||
-rw-r--r-- | libcpp/ChangeLog | 5 | ||||
-rw-r--r-- | libcpp/charset.c | 2 |
4 files changed, 27 insertions, 1 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index da4f0e2..bdb6f44 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2009-05-03 Joseph Myers <joseph@codesourcery.com> + + * gcc.dg/cpp/utf8-5byte-1.c: New test. + 2009-05-02 Joseph Myers <joseph@codesourcery.com> * gcc.dg/ucnid-6.c: Fix typo in dg-do directive. diff --git a/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c b/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c new file mode 100644 index 0000000..7f96a56 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c @@ -0,0 +1,17 @@ +/* Test for bug in conversions from 5-byte UTF-8 sequences in + cpplib. */ +/* { dg-do run { target { 4byte_wchar_t } } } */ +/* { dg-options "-std=gnu99" } */ + +extern void abort (void); +extern void exit (int); + +__WCHAR_TYPE__ ws[] = L"û¿¿¿¿"; + +int +main (void) +{ + if (ws[0] != L'\U03FFFFFF' || ws[1] != 0) + abort (); + exit (0); +} diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 24f3f8d..a541b69 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,8 @@ +2009-05-03 Joseph Myers <joseph@codesourcery.com> + + * charset.c (one_utf8_to_cppchar): Correct mask used for 5-byte + UTF-8 sequences. + 2009-04-25 Joseph Myers <joseph@codesourcery.com> PR preprocessor/39559 diff --git a/libcpp/charset.c b/libcpp/charset.c index e743b1e..f1da426 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -169,7 +169,7 @@ static inline int one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp, cppchar_t *cp) { - static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 }; + static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 }; static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; cppchar_t c; |