diff options
-rw-r--r-- | gcc/testsuite/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/g++.dg/cpp/ucn-1.C | 13 | ||||
-rw-r--r-- | libcpp/ChangeLog | 4 | ||||
-rw-r--r-- | libcpp/charset.c | 21 |
4 files changed, 39 insertions, 6 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3875b1d..534d797 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2009-10-09 Jason Merrill <jason@redhat.com> + + * g++.dg/cpp/ucn-1.C: New. + +2009-10-08 Jason Merrill <jason@redhat.com> + 2009-10-09 Janus Weil <janus@gcc.gnu.org> PR fortran/41585 @@ -60,6 +66,7 @@ 2009-10-08 Jason Merrill <jason@redhat.com> + PR c++/36816 * g++.dg/cpp0x/rv-deduce.C: New. PR c++/37177 diff --git a/gcc/testsuite/g++.dg/cpp/ucn-1.C b/gcc/testsuite/g++.dg/cpp/ucn-1.C new file mode 100644 index 0000000..354e1d9 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp/ucn-1.C @@ -0,0 +1,13 @@ +// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2170.html +// { dg-options "-std=c++0x -fextended-identifiers" } + +int main() +{ + "\u0041"; // 'A' UCN is OK in string literal + '\u0041'; // also OK in character literal + + int c\u0041c; // { dg-error "not valid in an identifier" } + int c\u0024c; // $ is OK; not part of basic source char set + + U"\uD800"; // { dg-error "not a valid universal character" } +} diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 4672abe..9d9556e 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,7 @@ +2009-10-09 Jason Merrill <jason@redhat.com> + + * charset.c (_cpp_valid_ucn): Update C++0x restrictions. + 2009-10-09 Neil Vachharajani <nvachhar@google.com> * directives.c (DIRECTIVE_TABLE): Remove DEPRECATED from ident and diff --git a/libcpp/charset.c b/libcpp/charset.c index b96c646..bd24ec24 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -948,10 +948,16 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, ISO/IEC 10646 is NNNNNNNN; the character designated by the universal character name \uNNNN is that character whose character short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value - for a universal character name is less than 0x20 or in the range - 0x7F-0x9F (inclusive), or if the universal character name - designates a character in the basic source character set, then the - program is ill-formed. + for a universal character name corresponds to a surrogate code point + (in the range 0xD800-0xDFFF, inclusive), the program is ill-formed. + Additionally, if the hexadecimal value for a universal-character-name + outside a character or string literal corresponds to a control character + (in either of the ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a + character in the basic source character set, the program is ill-formed. + + C99 6.4.3: A universal character name shall not specify a character + whose short identifier is less than 00A0 other than 0024 ($), 0040 (@), + or 0060 (`), nor one in the range D800 through DFFF inclusive. *PSTR must be preceded by "\u" or "\U"; it is assumed that the buffer end is delimited by a non-hex digit. Returns zero if the @@ -1018,9 +1024,12 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, (int) (str - base), base); result = 1; } - /* The standard permits $, @ and ` to be specified as UCNs. We use - hex escapes so that this also works with EBCDIC hosts. */ + /* The C99 standard permits $, @ and ` to be specified as UCNs. We use + hex escapes so that this also works with EBCDIC hosts. + C++0x permits everything below 0xa0 within literals; + ucn_valid_in_identifier will complain about identifiers. */ else if ((result < 0xa0 + && !CPP_OPTION (pfile, cplusplus) && (result != 0x24 && result != 0x40 && result != 0x60)) || (result & 0x80000000) || (result >= 0xD800 && result <= 0xDFFF)) |