diff options
author | Neil Booth <neil@daikokuya.co.uk> | 2003-04-20 07:29:23 +0000 |
---|---|---|
committer | Neil Booth <neil@gcc.gnu.org> | 2003-04-20 07:29:23 +0000 |
commit | 1613e52bdd61cfd2e00fb326c5cfef8e07f8c797 (patch) | |
tree | 01f484dfa9250821cfb9113a0a797ca2bec80e59 | |
parent | 0a45ec5c78e634e129fe3e9bbcddf6b721a10ca8 (diff) | |
download | gcc-1613e52bdd61cfd2e00fb326c5cfef8e07f8c797.zip gcc-1613e52bdd61cfd2e00fb326c5cfef8e07f8c797.tar.gz gcc-1613e52bdd61cfd2e00fb326c5cfef8e07f8c797.tar.bz2 |
Makefile.in (LIBCPP_OBJS): Add cppcharset.o.
* Makefile.in (LIBCPP_OBJS): Add cppcharset.o.
(cppcharset.o): New target.
* c-lex.c (is_extended_char): Move to cppcharset.c.
(utf8_extend_token): Delete.
* cppcharset.c: New file.
* cpphash.h (_cpp_valid_ucn): New.
* cpplex.c (lex_identifier): Update prototype.
(continues_identifier_p): Rename forms_identifier_p. Handle UCN
escapes.
(maybe_read_ucs): Rename maybe_read_ucn. Update to use code
in cppcharset.c.
(lex_number, lex_identifier, cpp_parse_escape): Update.
(_cpp_lex_direct): Update to handle UCNs.
(cpp_avoid_paste): Don't paste to form a UCN.
testsuite:
* ucs.c: Update diagnostic messages.
From-SVN: r65845
-rw-r--r-- | gcc/ChangeLog | 17 | ||||
-rw-r--r-- | gcc/Makefile.in | 3 | ||||
-rw-r--r-- | gcc/c-lex.c | 309 | ||||
-rw-r--r-- | gcc/cppcharset.c | 591 | ||||
-rw-r--r-- | gcc/cpphash.h | 4 | ||||
-rw-r--r-- | gcc/cpplex.c | 193 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/cpp/ucs.c | 8 |
8 files changed, 704 insertions, 425 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8d73923..b4c3a77 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2003-04-20 Neil Booth <neil@daikokuya.co.uk> + + * Makefile.in (LIBCPP_OBJS): Add cppcharset.o. + (cppcharset.o): New target. + * c-lex.c (is_extended_char): Move to cppcharset.c. + (utf8_extend_token): Delete. + * cppcharset.c: New file. + * cpphash.h (_cpp_valid_ucn): New. + * cpplex.c (lex_identifier): Update prototype. + (continues_identifier_p): Rename forms_identifier_p. Handle UCN + escapes. + (maybe_read_ucs): Rename maybe_read_ucn. Update to use code + in cppcharset.c. + (lex_number, lex_identifier, cpp_parse_escape): Update. + (_cpp_lex_direct): Update to handle UCNs. + (cpp_avoid_paste): Don't paste to form a UCN. + 2003-04-19 Roger Sayle <roger@eyesopen.com> * builtins.c (expand_builtin): Don't expand a pure or const diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 32a2b44..ceff352 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2320,7 +2320,7 @@ PREPROCESSOR_DEFINES = \ @TARGET_SYSTEM_ROOT_DEFINE@ LIBCPP_OBJS = cpplib.o cpplex.o cppmacro.o cppexp.o cppfiles.o cpptrad.o \ - cpphash.o cpperror.o cppinit.o \ + cpphash.o cpperror.o cppinit.o cppcharset.o \ hashtable.o line-map.o mkdeps.o mbchar.o cpppch.o LIBCPP_DEPS = $(CPPLIB_H) cpphash.h line-map.h hashtable.h intl.h \ @@ -2333,6 +2333,7 @@ libcpp.a: $(LIBCPP_OBJS) $(AR) $(AR_FLAGS) libcpp.a $(LIBCPP_OBJS) -$(RANLIB) libcpp.a +cppcharset.o: cppcharset.c $(LIBCPP_DEPS) cpperror.o: cpperror.c $(LIBCPP_DEPS) cppexp.o: cppexp.c $(LIBCPP_DEPS) cpplex.o: cpplex.c $(LIBCPP_DEPS) mbchar.h diff --git a/gcc/c-lex.c b/gcc/c-lex.c index 95419c5..ec16c15 100644 --- a/gcc/c-lex.c +++ b/gcc/c-lex.c @@ -322,315 +322,6 @@ cb_undef (pfile, line, node) (*debug_hooks->undef) (SOURCE_LINE (map, line), (const char *) NODE_NAME (node)); } - -#if 0 /* not yet */ -/* Returns nonzero if C is a universal-character-name. Give an error if it - is not one which may appear in an identifier, as per [extendid]. - - Note that extended character support in identifiers has not yet been - implemented. It is my personal opinion that this is not a desirable - feature. Portable code cannot count on support for more than the basic - identifier character set. */ - -static inline int -is_extended_char (c) - int c; -{ -#ifdef TARGET_EBCDIC - return 0; -#else - /* ASCII. */ - if (c < 0x7f) - return 0; - - /* None of the valid chars are outside the Basic Multilingual Plane (the - low 16 bits). */ - if (c > 0xffff) - { - error ("universal-character-name '\\U%08x' not valid in identifier", c); - return 1; - } - - /* Latin */ - if ((c >= 0x00c0 && c <= 0x00d6) - || (c >= 0x00d8 && c <= 0x00f6) - || (c >= 0x00f8 && c <= 0x01f5) - || (c >= 0x01fa && c <= 0x0217) - || (c >= 0x0250 && c <= 0x02a8) - || (c >= 0x1e00 && c <= 0x1e9a) - || (c >= 0x1ea0 && c <= 0x1ef9)) - return 1; - - /* Greek */ - if ((c == 0x0384) - || (c >= 0x0388 && c <= 0x038a) - || (c == 0x038c) - || (c >= 0x038e && c <= 0x03a1) - || (c >= 0x03a3 && c <= 0x03ce) - || (c >= 0x03d0 && c <= 0x03d6) - || (c == 0x03da) - || (c == 0x03dc) - || (c == 0x03de) - || (c == 0x03e0) - || (c >= 0x03e2 && c <= 0x03f3) - || (c >= 0x1f00 && c <= 0x1f15) - || (c >= 0x1f18 && c <= 0x1f1d) - || (c >= 0x1f20 && c <= 0x1f45) - || (c >= 0x1f48 && c <= 0x1f4d) - || (c >= 0x1f50 && c <= 0x1f57) - || (c == 0x1f59) - || (c == 0x1f5b) - || (c == 0x1f5d) - || (c >= 0x1f5f && c <= 0x1f7d) - || (c >= 0x1f80 && c <= 0x1fb4) - || (c >= 0x1fb6 && c <= 0x1fbc) - || (c >= 0x1fc2 && c <= 0x1fc4) - || (c >= 0x1fc6 && c <= 0x1fcc) - || (c >= 0x1fd0 && c <= 0x1fd3) - || (c >= 0x1fd6 && c <= 0x1fdb) - || (c >= 0x1fe0 && c <= 0x1fec) - || (c >= 0x1ff2 && c <= 0x1ff4) - || (c >= 0x1ff6 && c <= 0x1ffc)) - return 1; - - /* Cyrillic */ - if ((c >= 0x0401 && c <= 0x040d) - || (c >= 0x040f && c <= 0x044f) - || (c >= 0x0451 && c <= 0x045c) - || (c >= 0x045e && c <= 0x0481) - || (c >= 0x0490 && c <= 0x04c4) - || (c >= 0x04c7 && c <= 0x04c8) - || (c >= 0x04cb && c <= 0x04cc) - || (c >= 0x04d0 && c <= 0x04eb) - || (c >= 0x04ee && c <= 0x04f5) - || (c >= 0x04f8 && c <= 0x04f9)) - return 1; - - /* Armenian */ - if ((c >= 0x0531 && c <= 0x0556) - || (c >= 0x0561 && c <= 0x0587)) - return 1; - - /* Hebrew */ - if ((c >= 0x05d0 && c <= 0x05ea) - || (c >= 0x05f0 && c <= 0x05f4)) - return 1; - - /* Arabic */ - if ((c >= 0x0621 && c <= 0x063a) - || (c >= 0x0640 && c <= 0x0652) - || (c >= 0x0670 && c <= 0x06b7) - || (c >= 0x06ba && c <= 0x06be) - || (c >= 0x06c0 && c <= 0x06ce) - || (c >= 0x06e5 && c <= 0x06e7)) - return 1; - - /* Devanagari */ - if ((c >= 0x0905 && c <= 0x0939) - || (c >= 0x0958 && c <= 0x0962)) - return 1; - - /* Bengali */ - if ((c >= 0x0985 && c <= 0x098c) - || (c >= 0x098f && c <= 0x0990) - || (c >= 0x0993 && c <= 0x09a8) - || (c >= 0x09aa && c <= 0x09b0) - || (c == 0x09b2) - || (c >= 0x09b6 && c <= 0x09b9) - || (c >= 0x09dc && c <= 0x09dd) - || (c >= 0x09df && c <= 0x09e1) - || (c >= 0x09f0 && c <= 0x09f1)) - return 1; - - /* Gurmukhi */ - if ((c >= 0x0a05 && c <= 0x0a0a) - || (c >= 0x0a0f && c <= 0x0a10) - || (c >= 0x0a13 && c <= 0x0a28) - || (c >= 0x0a2a && c <= 0x0a30) - || (c >= 0x0a32 && c <= 0x0a33) - || (c >= 0x0a35 && c <= 0x0a36) - || (c >= 0x0a38 && c <= 0x0a39) - || (c >= 0x0a59 && c <= 0x0a5c) - || (c == 0x0a5e)) - return 1; - - /* Gujarati */ - if ((c >= 0x0a85 && c <= 0x0a8b) - || (c == 0x0a8d) - || (c >= 0x0a8f && c <= 0x0a91) - || (c >= 0x0a93 && c <= 0x0aa8) - || (c >= 0x0aaa && c <= 0x0ab0) - || (c >= 0x0ab2 && c <= 0x0ab3) - || (c >= 0x0ab5 && c <= 0x0ab9) - || (c == 0x0ae0)) - return 1; - - /* Oriya */ - if ((c >= 0x0b05 && c <= 0x0b0c) - || (c >= 0x0b0f && c <= 0x0b10) - || (c >= 0x0b13 && c <= 0x0b28) - || (c >= 0x0b2a && c <= 0x0b30) - || (c >= 0x0b32 && c <= 0x0b33) - || (c >= 0x0b36 && c <= 0x0b39) - || (c >= 0x0b5c && c <= 0x0b5d) - || (c >= 0x0b5f && c <= 0x0b61)) - return 1; - - /* Tamil */ - if ((c >= 0x0b85 && c <= 0x0b8a) - || (c >= 0x0b8e && c <= 0x0b90) - || (c >= 0x0b92 && c <= 0x0b95) - || (c >= 0x0b99 && c <= 0x0b9a) - || (c == 0x0b9c) - || (c >= 0x0b9e && c <= 0x0b9f) - || (c >= 0x0ba3 && c <= 0x0ba4) - || (c >= 0x0ba8 && c <= 0x0baa) - || (c >= 0x0bae && c <= 0x0bb5) - || (c >= 0x0bb7 && c <= 0x0bb9)) - return 1; - - /* Telugu */ - if ((c >= 0x0c05 && c <= 0x0c0c) - || (c >= 0x0c0e && c <= 0x0c10) - || (c >= 0x0c12 && c <= 0x0c28) - || (c >= 0x0c2a && c <= 0x0c33) - || (c >= 0x0c35 && c <= 0x0c39) - || (c >= 0x0c60 && c <= 0x0c61)) - return 1; - - /* Kannada */ - if ((c >= 0x0c85 && c <= 0x0c8c) - || (c >= 0x0c8e && c <= 0x0c90) - || (c >= 0x0c92 && c <= 0x0ca8) - || (c >= 0x0caa && c <= 0x0cb3) - || (c >= 0x0cb5 && c <= 0x0cb9) - || (c >= 0x0ce0 && c <= 0x0ce1)) - return 1; - - /* Malayalam */ - if ((c >= 0x0d05 && c <= 0x0d0c) - || (c >= 0x0d0e && c <= 0x0d10) - || (c >= 0x0d12 && c <= 0x0d28) - || (c >= 0x0d2a && c <= 0x0d39) - || (c >= 0x0d60 && c <= 0x0d61)) - return 1; - - /* Thai */ - if ((c >= 0x0e01 && c <= 0x0e30) - || (c >= 0x0e32 && c <= 0x0e33) - || (c >= 0x0e40 && c <= 0x0e46) - || (c >= 0x0e4f && c <= 0x0e5b)) - return 1; - - /* Lao */ - if ((c >= 0x0e81 && c <= 0x0e82) - || (c == 0x0e84) - || (c == 0x0e87) - || (c == 0x0e88) - || (c == 0x0e8a) - || (c == 0x0e0d) - || (c >= 0x0e94 && c <= 0x0e97) - || (c >= 0x0e99 && c <= 0x0e9f) - || (c >= 0x0ea1 && c <= 0x0ea3) - || (c == 0x0ea5) - || (c == 0x0ea7) - || (c == 0x0eaa) - || (c == 0x0eab) - || (c >= 0x0ead && c <= 0x0eb0) - || (c == 0x0eb2) - || (c == 0x0eb3) - || (c == 0x0ebd) - || (c >= 0x0ec0 && c <= 0x0ec4) - || (c == 0x0ec6)) - return 1; - - /* Georgian */ - if ((c >= 0x10a0 && c <= 0x10c5) - || (c >= 0x10d0 && c <= 0x10f6)) - return 1; - - /* Hiragana */ - if ((c >= 0x3041 && c <= 0x3094) - || (c >= 0x309b && c <= 0x309e)) - return 1; - - /* Katakana */ - if ((c >= 0x30a1 && c <= 0x30fe)) - return 1; - - /* Bopmofo */ - if ((c >= 0x3105 && c <= 0x312c)) - return 1; - - /* Hangul */ - if ((c >= 0x1100 && c <= 0x1159) - || (c >= 0x1161 && c <= 0x11a2) - || (c >= 0x11a8 && c <= 0x11f9)) - return 1; - - /* CJK Unified Ideographs */ - if ((c >= 0xf900 && c <= 0xfa2d) - || (c >= 0xfb1f && c <= 0xfb36) - || (c >= 0xfb38 && c <= 0xfb3c) - || (c == 0xfb3e) - || (c >= 0xfb40 && c <= 0xfb41) - || (c >= 0xfb42 && c <= 0xfb44) - || (c >= 0xfb46 && c <= 0xfbb1) - || (c >= 0xfbd3 && c <= 0xfd3f) - || (c >= 0xfd50 && c <= 0xfd8f) - || (c >= 0xfd92 && c <= 0xfdc7) - || (c >= 0xfdf0 && c <= 0xfdfb) - || (c >= 0xfe70 && c <= 0xfe72) - || (c == 0xfe74) - || (c >= 0xfe76 && c <= 0xfefc) - || (c >= 0xff21 && c <= 0xff3a) - || (c >= 0xff41 && c <= 0xff5a) - || (c >= 0xff66 && c <= 0xffbe) - || (c >= 0xffc2 && c <= 0xffc7) - || (c >= 0xffca && c <= 0xffcf) - || (c >= 0xffd2 && c <= 0xffd7) - || (c >= 0xffda && c <= 0xffdc) - || (c >= 0x4e00 && c <= 0x9fa5)) - return 1; - - error ("universal-character-name '\\u%04x' not valid in identifier", c); - return 1; -#endif -} - -/* Add the UTF-8 representation of C to the token_buffer. */ - -static void -utf8_extend_token (c) - int c; -{ - int shift, mask; - - if (c <= 0x0000007f) - { - extend_token (c); - return; - } - else if (c <= 0x000007ff) - shift = 6, mask = 0xc0; - else if (c <= 0x0000ffff) - shift = 12, mask = 0xe0; - else if (c <= 0x001fffff) - shift = 18, mask = 0xf0; - else if (c <= 0x03ffffff) - shift = 24, mask = 0xf8; - else - shift = 30, mask = 0xfc; - - extend_token (mask | (c >> shift)); - do - { - shift -= 6; - extend_token ((unsigned char) (0x80 | (c >> shift))); - } - while (shift); -} -#endif int c_lex (value) diff --git a/gcc/cppcharset.c b/gcc/cppcharset.c new file mode 100644 index 0000000..900e451 --- /dev/null +++ b/gcc/cppcharset.c @@ -0,0 +1,591 @@ +/* CPP Library - charsets + Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 + Free Software Foundation, Inc. + + Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges. + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "cpplib.h" +#include "cpphash.h" + +static int ucn_valid_in_identifier PARAMS ((cpp_reader *, cppchar_t)); + +/* [lex.charset]: The character designated by the universal character + name \UNNNNNNNN is that character whose character short name in + ISO/IEC 10646 is NNNNNNNN; the character designated by the + universal character name \uNNNN is that character whose character + short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value + for a universal character name is less than 0x20 or in the range + 0x7F-0x9F (inclusive), or if the universal character name + designates a character in the basic source character set, then the + program is ill-formed. + + *PSTR must be preceded by "\u" or "\U"; it is assumed that the + buffer end is delimited by a non-hex digit. Returns zero if UCNs + are not part of the relevant standard, or if the string beginning + at *PSTR doesn't syntactically match the form 'NNNN' or 'NNNNNNNN'. + + Otherwise the non-zero value of the UCN, whether valid or invalid, + is returned. Diagnostics are emitted for invalid values. PSTR + is updated to point one beyond the UCN, or to the syntactically + invalid character. + + IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of + an identifier, or 2 otherwise. +*/ + +cppchar_t +_cpp_valid_ucn (pfile, pstr, identifier_pos) + cpp_reader *pfile; + const uchar **pstr; + int identifier_pos; +{ + cppchar_t result, c; + unsigned int length; + const uchar *str = *pstr; + const uchar *base = str - 2; + + /* Only attempt to interpret a UCS for C++ and C99. */ + if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99)) + return 0; + + /* We don't accept UCNs for an EBCDIC target. */ + if (CPP_OPTION (pfile, EBCDIC)) + return 0; + + if (str[-1] == 'u') + length = 4; + else if (str[-1] == 'U') + length = 8; + else + abort(); + + result = 0; + do + { + c = *str; + if (!ISXDIGIT (c)) + break; + str++; + result = (result << 4) + hex_value (c); + } + while (--length); + + *pstr = str; + if (length) + /* We'll error when we try it out as the start of an identifier. */ + cpp_error (pfile, DL_ERROR, "incomplete universal character name %.*s", + str - base, base); + /* The standard permits $, @ and ` to be specified as UCNs. We use + hex escapes so that this also works with EBCDIC hosts. */ + else if ((result < 0xa0 + && (result != 0x24 && result != 0x40 && result != 0x60)) + || (result & 0x80000000) + || (result >= 0xD800 && result <= 0xDFFF)) + { + cpp_error (pfile, DL_ERROR, "%.*s is not a valid universal character", + str - base, base); + } + else if (identifier_pos) + { + int validity = ucn_valid_in_identifier (pfile, result); + + if (validity == 0) + cpp_error (pfile, DL_ERROR, + "universal character %.*s is not valid in an identifier", + str - base, base); + else if (validity == 2 && identifier_pos == 1) + cpp_error (pfile, DL_ERROR, + "universal character %.*s is not valid at the start of an identifier", + str - base, base); + } + + if (result == 0) + result = 1; + + return result; +} + +/* Returns 1 if C is valid in an identifier, 2 if C is valid except at + the start of an identifier, and 0 if C is not valid in an + identifier. We assume C has already gone through the checks of + _cpp_valid_ucn. */ +static int +ucn_valid_in_identifier (pfile, c) + cpp_reader *pfile; + cppchar_t c; +{ + /* None of the valid chars are outside the Basic Multilingual Plane (the + low 16 bits). */ + if (c > 0xffff) + return 0; + + if (CPP_OPTION (pfile, c99) || !CPP_PEDANTIC (pfile)) + { + /* Latin. */ + if (c == 0x0aa || c == 0x00ba || c == 0x207f || c == 0x1e9b) + return 1; + + /* Greek. */ + if (c == 0x0386) + return 1; + + /* Cyrillic. */ + if (c == 0x040c) + return 1; + + /* Hebrew. */ + if ((c >= 0x05b0 && c <= 0x05b9) + || (c >= 0x05bb && c <= 0x005bd) + || c == 0x05bf + || (c >= 0x05c1 && c <= 0x05c2)) + return 1; + + /* Arabic. */ + if ((c >= 0x06d0 && c <= 0x06dc) + || c == 0x06e8 + || (c >= 0x06ea && c <= 0x06ed)) + return 1; + + /* Devanagari */ + if ((c >= 0x0901 && c <= 0x0903) + || (c >= 0x093e && c <= 0x094d) + || (c >= 0x0950 && c <= 0x0952) + || c == 0x0963) + return 1; + + /* Bengali */ + if ((c >= 0x0981 && c <= 0x0983) + || (c >= 0x09be && c <= 0x09c4) + || (c >= 0x09c7 && c <= 0x09c8) + || (c >= 0x09cb && c <= 0x09cd) + || (c >= 0x09e2 && c <= 0x09e3)) + return 1; + + /* Gurmukhi */ + if (c == 0x0a02 + || (c >= 0x0a3e && c <= 0x0a42) + || (c >= 0x0a47 && c <= 0x0a48) + || (c >= 0x0a4b && c <= 0x0a4d) + || (c == 0x0a74)) + return 1; + + /* Gujarati */ + if ((c >= 0x0a81 && c <= 0x0a83) + || (c >= 0x0abd && c <= 0x0ac5) + || (c >= 0x0ac7 && c <= 0x0ac9) + || (c >= 0x0acb && c <= 0x0acd) + || (c == 0x0ad0)) + return 1; + + /* Oriya */ + if ((c >= 0x0b01 && c <= 0x0b03) + || (c >= 0x0b3e && c <= 0x0b43) + || (c >= 0x0b47 && c <= 0x0b48) + || (c >= 0x0b4b && c <= 0x0b4d)) + return 1; + + /* Tamil */ + if ((c >= 0x0b82 && c <= 0x0b83) + || (c >= 0x0bbe && c <= 0x0bc2) + || (c >= 0x0bc6 && c <= 0x0bc8) + || (c >= 0x0bc8 && c <= 0x0bcd)) + return 1; + + /* Telugu */ + if ((c >= 0x0c01 && c <= 0x0c03) + || (c >= 0x0c3e && c <= 0x0c44) + || (c >= 0x0c46 && c <= 0x0c48) + || (c >= 0x0c4a && c <= 0x0c4d)) + return 1; + + /* Kannada */ + if ((c >= 0x0c82 && c <= 0x0c83) + || (c >= 0x0cbe && c <= 0x0cc4) + || (c >= 0x0cc6 && c <= 0x0cc8) + || (c >= 0x0cca && c <= 0x0ccd) + || c == 0x0cde) + return 1; + + /* Malayalam */ + if ((c >= 0x0d02 && c <= 0x0d03) + || (c >= 0x0d3e && c <= 0x0d43) + || (c >= 0x0d46 && c <= 0x0d48) + || (c >= 0x0d4a && c <= 0x0d4d)) + return 1; + + /* Thai */ + if ((c >= 0x0e01 && c <= 0x0e3a) + || (c >= 0x0e40 && c <= 0x0e5b)) + return 1; + + /* Lao */ + if ((c >= 0x0ead && c <= 0x0eae) + || (c >= 0x0eb0 && c <= 0x0eb9) + || (c >= 0x0ebb && c <= 0x0ebd) + || (c >= 0x0ec0 && c <= 0x0ec4) + || c == 0x0ec6 + || (c >= 0x0ec8 && c <= 0x0ecd) + || (c >= 0x0edc && c <= 0x0ed)) + return 1; + + /* Tibetan. */ + if (c == 0x0f00 + || (c >= 0x0f18 && c <= 0x0f19) + || c == 0x0f35 + || c == 0x0f37 + || c == 0x0f39 + || (c >= 0x0f3e && c <= 0x0f47) + || (c >= 0x0f49 && c <= 0x0f69) + || (c >= 0x0f71 && c <= 0x0f84) + || (c >= 0x0f86 && c <= 0x0f8b) + || (c >= 0x0f90 && c <= 0x0f95) + || c == 0x0f97 + || (c >= 0x0f99 && c <= 0x0fad) + || (c >= 0x0fb1 && c <= 0x0fb7) + || c == 0x0fb9) + return 1; + + /* Katakana */ + if ((c >= 0x30a1 && c <= 0x30f6) + || (c >= 0x30fb && c <= 0x30fc)) + return 1; + + /* CJK Unified Ideographs. */ + if (c >= 0x4e00 && c <= 0x9fa5) + return 1; + + /* Hangul. */ + if (c >= 0xac00 && c <= 0xd7a3) + return 1; + + /* Digits. */ + if ((c >= 0x0660 && c <= 0x0669) + || (c >= 0x06f0 && c <= 0x06f9) + || (c >= 0x0966 && c <= 0x096f) + || (c >= 0x09e6 && c <= 0x09ef) + || (c >= 0x0a66 && c <= 0x0a6f) + || (c >= 0x0ae6 && c <= 0x0aef) + || (c >= 0x0b66 && c <= 0x0b6f) + || (c >= 0x0be7 && c <= 0x0bef) + || (c >= 0x0c66 && c <= 0x0c6f) + || (c >= 0x0ce6 && c <= 0x0cef) + || (c >= 0x0d66 && c <= 0x0d6f) + || (c >= 0x0e50 && c <= 0x0e59) + || (c >= 0x0ed0 && c <= 0x0ed9) + || (c >= 0x0f20 && c <= 0x0f33)) + return 2; + + /* Special characters. */ + if (c == 0x00b5 + || c == 0x00b7 + || (c >= 0x02b0 && c <= 0x02b8) + || c == 0x02bb + || (c >= 0x02bd && c <= 0x02c1) + || (c >= 0x02d0 && c <= 0x02d1) + || (c >= 0x02e0 && c <= 0x02e4) + || c == 0x037a + || c == 0x0559 + || c == 0x093d + || c == 0x0b3d + || c == 0x1fbe + || (c >= 0x203f && c <= 0x2040) + || c == 0x2102 + || c == 0x2107 + || (c >= 0x210a && c <= 0x2113) + || c == 0x2115 + || (c >= 0x2118 && c <= 0x211d) + || c == 0x2124 + || c == 0x2126 + || c == 0x2128 + || (c >= 0x212a && c <= 0x2131) + || (c >= 0x2133 && c <= 0x2138) + || (c >= 0x2160 && c <= 0x2182) + || (c >= 0x3005 && c <= 0x3007) + || (c >= 0x3021 && c <= 0x3029)) + return 1; + } + + if (CPP_OPTION (pfile, cplusplus) || !CPP_PEDANTIC (pfile)) + { + /* Greek. */ + if (c == 0x0384) + return 1; + + /* Cyrillic. */ + if (c == 0x040d) + return 1; + + /* Hebrew. */ + if (c >= 0x05f3 && c <= 0x05f4) + return 1; + + /* Lao. */ + if ((c >= 0x0ead && c <= 0x0eb0) + || (c == 0x0eb2) + || (c == 0x0eb3) + || (c == 0x0ebd) + || (c >= 0x0ec0 && c <= 0x0ec4) + || (c == 0x0ec6)) + return 1; + + /* Hiragana */ + if (c == 0x3094 + || (c >= 0x309d && c <= 0x309e)) + return 1; + + /* Katakana */ + if ((c >= 0x30a1 && c <= 0x30fe)) + return 1; + + /* Hangul */ + if ((c >= 0x1100 && c <= 0x1159) + || (c >= 0x1161 && c <= 0x11a2) + || (c >= 0x11a8 && c <= 0x11f9)) + return 1; + + /* CJK Unified Ideographs */ + if ((c >= 0xf900 && c <= 0xfa2d) + || (c >= 0xfb1f && c <= 0xfb36) + || (c >= 0xfb38 && c <= 0xfb3c) + || (c == 0xfb3e) + || (c >= 0xfb40 && c <= 0xfb41) + || (c >= 0xfb42 && c <= 0xfb44) + || (c >= 0xfb46 && c <= 0xfbb1) + || (c >= 0xfbd3 && c <= 0xfd3f) + || (c >= 0xfd50 && c <= 0xfd8f) + || (c >= 0xfd92 && c <= 0xfdc7) + || (c >= 0xfdf0 && c <= 0xfdfb) + || (c >= 0xfe70 && c <= 0xfe72) + || (c == 0xfe74) + || (c >= 0xfe76 && c <= 0xfefc) + || (c >= 0xff21 && c <= 0xff3a) + || (c >= 0xff41 && c <= 0xff5a) + || (c >= 0xff66 && c <= 0xffbe) + || (c >= 0xffc2 && c <= 0xffc7) + || (c >= 0xffca && c <= 0xffcf) + || (c >= 0xffd2 && c <= 0xffd7) + || (c >= 0xffda && c <= 0xffdc) + || (c >= 0x4e00 && c <= 0x9fa5)) + return 1; + } + + /* Latin */ + if ((c >= 0x00c0 && c <= 0x00d6) + || (c >= 0x00d8 && c <= 0x00f6) + || (c >= 0x00f8 && c <= 0x01f5) + || (c >= 0x01fa && c <= 0x0217) + || (c >= 0x0250 && c <= 0x02a8) + || (c >= 0x1e00 && c <= 0x1e9a) + || (c >= 0x1ea0 && c <= 0x1ef9)) + return 1; + + /* Greek */ + if ((c >= 0x0388 && c <= 0x038a) + || (c == 0x038c) + || (c >= 0x038e && c <= 0x03a1) + || (c >= 0x03a3 && c <= 0x03ce) + || (c >= 0x03d0 && c <= 0x03d6) + || (c == 0x03da) + || (c == 0x03dc) + || (c == 0x03de) + || (c == 0x03e0) + || (c >= 0x03e2 && c <= 0x03f3) + || (c >= 0x1f00 && c <= 0x1f15) + || (c >= 0x1f18 && c <= 0x1f1d) + || (c >= 0x1f20 && c <= 0x1f45) + || (c >= 0x1f48 && c <= 0x1f4d) + || (c >= 0x1f50 && c <= 0x1f57) + || (c == 0x1f59) + || (c == 0x1f5b) + || (c == 0x1f5d) + || (c >= 0x1f5f && c <= 0x1f7d) + || (c >= 0x1f80 && c <= 0x1fb4) + || (c >= 0x1fb6 && c <= 0x1fbc) + || (c >= 0x1fc2 && c <= 0x1fc4) + || (c >= 0x1fc6 && c <= 0x1fcc) + || (c >= 0x1fd0 && c <= 0x1fd3) + || (c >= 0x1fd6 && c <= 0x1fdb) + || (c >= 0x1fe0 && c <= 0x1fec) + || (c >= 0x1ff2 && c <= 0x1ff4) + || (c >= 0x1ff6 && c <= 0x1ffc)) + return 1; + + /* Cyrillic */ + if ((c >= 0x0401 && c <= 0x040c) + || (c >= 0x040f && c <= 0x044f) + || (c >= 0x0451 && c <= 0x045c) + || (c >= 0x045e && c <= 0x0481) + || (c >= 0x0490 && c <= 0x04c4) + || (c >= 0x04c7 && c <= 0x04c8) + || (c >= 0x04cb && c <= 0x04cc) + || (c >= 0x04d0 && c <= 0x04eb) + || (c >= 0x04ee && c <= 0x04f5) + || (c >= 0x04f8 && c <= 0x04f9)) + return 1; + + /* Armenian */ + if ((c >= 0x0531 && c <= 0x0556) + || (c >= 0x0561 && c <= 0x0587)) + return 1; + + /* Hebrew */ + if ((c >= 0x05d0 && c <= 0x05ea) + || (c >= 0x05f0 && c <= 0x05f2)) + return 1; + + /* Arabic */ + if ((c >= 0x0621 && c <= 0x063a) + || (c >= 0x0640 && c <= 0x0652) + || (c >= 0x0670 && c <= 0x06b7) + || (c >= 0x06ba && c <= 0x06be) + || (c >= 0x06c0 && c <= 0x06ce) + || (c >= 0x06e5 && c <= 0x06e7)) + return 1; + + /* Devanagari */ + if ((c >= 0x0905 && c <= 0x0939) + || (c >= 0x0958 && c <= 0x0962)) + return 1; + + /* Bengali */ + if ((c >= 0x0985 && c <= 0x098c) + || (c >= 0x098f && c <= 0x0990) + || (c >= 0x0993 && c <= 0x09a8) + || (c >= 0x09aa && c <= 0x09b0) + || (c == 0x09b2) + || (c >= 0x09b6 && c <= 0x09b9) + || (c >= 0x09dc && c <= 0x09dd) + || (c >= 0x09df && c <= 0x09e1) + || (c >= 0x09f0 && c <= 0x09f1)) + return 1; + + /* Gurmukhi */ + if ((c >= 0x0a05 && c <= 0x0a0a) + || (c >= 0x0a0f && c <= 0x0a10) + || (c >= 0x0a13 && c <= 0x0a28) + || (c >= 0x0a2a && c <= 0x0a30) + || (c >= 0x0a32 && c <= 0x0a33) + || (c >= 0x0a35 && c <= 0x0a36) + || (c >= 0x0a38 && c <= 0x0a39) + || (c >= 0x0a59 && c <= 0x0a5c) + || (c == 0x0a5e)) + return 1; + + /* Gujarati */ + if ((c >= 0x0a85 && c <= 0x0a8b) + || (c == 0x0a8d) + || (c >= 0x0a8f && c <= 0x0a91) + || (c >= 0x0a93 && c <= 0x0aa8) + || (c >= 0x0aaa && c <= 0x0ab0) + || (c >= 0x0ab2 && c <= 0x0ab3) + || (c >= 0x0ab5 && c <= 0x0ab9) + || (c == 0x0ae0)) + return 1; + + /* Oriya */ + if ((c >= 0x0b05 && c <= 0x0b0c) + || (c >= 0x0b0f && c <= 0x0b10) + || (c >= 0x0b13 && c <= 0x0b28) + || (c >= 0x0b2a && c <= 0x0b30) + || (c >= 0x0b32 && c <= 0x0b33) + || (c >= 0x0b36 && c <= 0x0b39) + || (c >= 0x0b5c && c <= 0x0b5d) + || (c >= 0x0b5f && c <= 0x0b61)) + return 1; + + /* Tamil */ + if ((c >= 0x0b85 && c <= 0x0b8a) + || (c >= 0x0b8e && c <= 0x0b90) + || (c >= 0x0b92 && c <= 0x0b95) + || (c >= 0x0b99 && c <= 0x0b9a) + || (c == 0x0b9c) + || (c >= 0x0b9e && c <= 0x0b9f) + || (c >= 0x0ba3 && c <= 0x0ba4) + || (c >= 0x0ba8 && c <= 0x0baa) + || (c >= 0x0bae && c <= 0x0bb5) + || (c >= 0x0bb7 && c <= 0x0bb9)) + return 1; + + /* Telugu */ + if ((c >= 0x0c05 && c <= 0x0c0c) + || (c >= 0x0c0e && c <= 0x0c10) + || (c >= 0x0c12 && c <= 0x0c28) + || (c >= 0x0c2a && c <= 0x0c33) + || (c >= 0x0c35 && c <= 0x0c39) + || (c >= 0x0c60 && c <= 0x0c61)) + return 1; + + /* Kannada */ + if ((c >= 0x0c85 && c <= 0x0c8c) + || (c >= 0x0c8e && c <= 0x0c90) + || (c >= 0x0c92 && c <= 0x0ca8) + || (c >= 0x0caa && c <= 0x0cb3) + || (c >= 0x0cb5 && c <= 0x0cb9) + || (c >= 0x0ce0 && c <= 0x0ce1)) + return 1; + + /* Malayalam */ + if ((c >= 0x0d05 && c <= 0x0d0c) + || (c >= 0x0d0e && c <= 0x0d10) + || (c >= 0x0d12 && c <= 0x0d28) + || (c >= 0x0d2a && c <= 0x0d39) + || (c >= 0x0d60 && c <= 0x0d61)) + return 1; + + /* Thai */ + if ((c >= 0x0e01 && c <= 0x0e30) + || (c >= 0x0e32 && c <= 0x0e33) + || (c >= 0x0e40 && c <= 0x0e46) + || (c >= 0x0e4f && c <= 0x0e5b)) + return 1; + + /* Lao */ + if ((c >= 0x0e81 && c <= 0x0e82) + || (c == 0x0e84) + || (c == 0x0e87) + || (c == 0x0e88) + || (c == 0x0e8a) + || (c == 0x0e8d) + || (c >= 0x0e94 && c <= 0x0e97) + || (c >= 0x0e99 && c <= 0x0e9f) + || (c >= 0x0ea1 && c <= 0x0ea3) + || (c == 0x0ea5) + || (c == 0x0ea7) + || (c == 0x0eaa) + || (c == 0x0eab)) + return 1; + + /* Georgian */ + if ((c >= 0x10a0 && c <= 0x10c5) + || (c >= 0x10d0 && c <= 0x10f6)) + return 1; + + /* Hiragana */ + if ((c >= 0x3041 && c <= 0x3093) + || (c >= 0x309b && c <= 0x309c)) + return 1; + + /* Bopmofo */ + if ((c >= 0x3105 && c <= 0x312c)) + return 1; + + return 0; +} diff --git a/gcc/cpphash.h b/gcc/cpphash.h index f1ee4a6..4eee942 100644 --- a/gcc/cpphash.h +++ b/gcc/cpphash.h @@ -555,6 +555,10 @@ extern bool _cpp_expansions_different_trad PARAMS ((const cpp_macro *, extern uchar *_cpp_copy_replacement_text PARAMS ((const cpp_macro *, uchar *)); extern size_t _cpp_replacement_text_len PARAMS ((const cpp_macro *)); +/* In cppcharset.c. */ +cppchar_t _cpp_valid_ucn PARAMS ((cpp_reader *, const uchar **, + int identifer_p)); + /* Utility routines and macros. */ #define DSC(str) (const uchar *)str, sizeof str - 1 #define xnew(T) (T *) xmalloc (sizeof(T)) diff --git a/gcc/cpplex.c b/gcc/cpplex.c index c8caa39..41e8a00 100644 --- a/gcc/cpplex.c +++ b/gcc/cpplex.c @@ -59,15 +59,14 @@ static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int)); static int skip_line_comment PARAMS ((cpp_reader *)); static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t)); -static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *)); +static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *)); static void lex_number PARAMS ((cpp_reader *, cpp_string *)); -static bool continues_identifier_p PARAMS ((cpp_reader *)); +static bool forms_identifier_p PARAMS ((cpp_reader *, int)); static void lex_string PARAMS ((cpp_reader *, cpp_token *)); static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *, cppchar_t)); static int name_p PARAMS ((cpp_reader *, const cpp_string *)); -static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **, - const unsigned char *, cppchar_t *)); +static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **)); static tokenrun *next_tokenrun PARAMS ((tokenrun *)); static unsigned int hex_digit_value PARAMS ((unsigned int)); @@ -361,33 +360,53 @@ name_p (pfile, string) } /* Returns TRUE if the sequence starting at buffer->cur is invalid in - an identifier. */ + an identifier. FIRST is TRUE if this starts an identifier. */ static bool -continues_identifier_p (pfile) +forms_identifier_p (pfile, first) cpp_reader *pfile; + int first; { - if (*pfile->buffer->cur != '$' || !CPP_OPTION (pfile, dollars_in_ident)) - return false; + cpp_buffer *buffer = pfile->buffer; - if (CPP_PEDANTIC (pfile) && !pfile->state.skipping && !pfile->warned_dollar) + if (*buffer->cur == '$') { - pfile->warned_dollar = true; - cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number"); + if (!CPP_OPTION (pfile, dollars_in_ident)) + return false; + + buffer->cur++; + if (CPP_PEDANTIC (pfile) + && !pfile->state.skipping + && !pfile->warned_dollar) + { + pfile->warned_dollar = true; + cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number"); + } + + return true; } - pfile->buffer->cur++; - return true; + /* Is this a syntactically valid UCN? */ + if (0 && *buffer->cur == '\\' + && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) + { + buffer->cur += 2; + if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first)) + return true; + buffer->cur -= 2; + } + + return false; } /* Lex an identifier starting at BUFFER->CUR - 1. */ static cpp_hashnode * -lex_identifier (pfile) +lex_identifier (pfile, base) cpp_reader *pfile; + const uchar *base; { cpp_hashnode *result; - const uchar *cur, *base; + const uchar *cur; - base = pfile->buffer->cur - 1; do { cur = pfile->buffer->cur; @@ -398,7 +417,7 @@ lex_identifier (pfile) pfile->buffer->cur = cur; } - while (continues_identifier_p (pfile)); + while (forms_identifier_p (pfile, false)); result = (cpp_hashnode *) ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC); @@ -444,7 +463,7 @@ lex_number (pfile, number) pfile->buffer->cur = cur; } - while (continues_identifier_p (pfile)); + while (forms_identifier_p (pfile, false)); number->len = cur - base; dest = _cpp_unaligned_alloc (pfile, number->len + 1); @@ -803,7 +822,6 @@ _cpp_lex_direct (pfile) } /* Fall through. */ - start_ident: case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': @@ -816,7 +834,7 @@ _cpp_lex_direct (pfile) case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': result->type = CPP_NAME; - result->val.node = lex_identifier (pfile); + result->val.node = lex_identifier (pfile, buffer->cur - 1); /* Convert named operators to their proper types. */ if (result->val.node->flags & NODE_OPERATOR) @@ -1044,14 +1062,23 @@ _cpp_lex_direct (pfile) case '@': result->type = CPP_ATSIGN; break; case '$': - if (CPP_OPTION (pfile, dollars_in_ident)) - goto start_ident; - /* Fall through... */ + case '\\': + { + const uchar *base = --buffer->cur; - default: - result->type = CPP_OTHER; - result->val.c = c; - break; + if (forms_identifier_p (pfile, true)) + { + result->type = CPP_NAME; + result->val.node = lex_identifier (pfile, base); + break; + } + buffer->cur++; + + default: + result->type = CPP_OTHER; + result->val.c = c; + break; + } } return result; @@ -1321,9 +1348,11 @@ cpp_avoid_paste (pfile, token1, token2) || b == CPP_CHAR || b == CPP_STRING); /* L */ case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME || c == '.' || c == '+' || c == '-'); - case CPP_OTHER: return (CPP_OPTION (pfile, objc) - && token1->val.c == '@' - && (b == CPP_NAME || b == CPP_STRING)); + /* UCNs */ + case CPP_OTHER: return ((token1->val.c == '\\' && b == CPP_NAME) + || (CPP_OPTION (pfile, objc) + && token1->val.c == '@' + && (b == CPP_NAME || b == CPP_STRING))); default: break; } @@ -1363,93 +1392,31 @@ hex_digit_value (c) abort (); } -/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate - failure if cpplib is not parsing C++ or C99. Such failure is - silent, and no variables are updated. Otherwise returns 0, and - warns if -Wtraditional. - - [lex.charset]: The character designated by the universal character - name \UNNNNNNNN is that character whose character short name in - ISO/IEC 10646 is NNNNNNNN; the character designated by the - universal character name \uNNNN is that character whose character - short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value - for a universal character name is less than 0x20 or in the range - 0x7F-0x9F (inclusive), or if the universal character name - designates a character in the basic source character set, then the - program is ill-formed. - - We assume that wchar_t is Unicode, so we don't need to do any - mapping. Is this ever wrong? - - PC points to the 'u' or 'U', PSTR is points to the byte after PC, - LIMIT is the end of the string or charconst. PSTR is updated to - point after the UCS on return, and the UCS is written into PC. */ - -static int -maybe_read_ucs (pfile, pstr, limit, pc) +/* Read a possible universal character name starting at *PSTR. */ +static cppchar_t +maybe_read_ucn (pfile, pstr) cpp_reader *pfile; - const unsigned char **pstr; - const unsigned char *limit; - cppchar_t *pc; + const uchar **pstr; { - const unsigned char *p = *pstr; - unsigned int code = 0; - unsigned int c = *pc, length; - - /* Only attempt to interpret a UCS for C++ and C99. */ - if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))) - return 1; + cppchar_t result, c = (*pstr)[-1]; - if (CPP_WTRADITIONAL (pfile)) - cpp_error (pfile, DL_WARNING, - "the meaning of '\\%c' is different in traditional C", c); - - length = (c == 'u' ? 4: 8); - - if ((size_t) (limit - p) < length) - { - cpp_error (pfile, DL_ERROR, "incomplete universal-character-name"); - /* Skip to the end to avoid more diagnostics. */ - p = limit; - } - else + result = _cpp_valid_ucn (pfile, pstr, false); + if (result) { - for (; length; length--, p++) + if (CPP_WTRADITIONAL (pfile)) + cpp_error (pfile, DL_WARNING, + "the meaning of '\\%c' is different in traditional C", + (int) c); + + if (CPP_OPTION (pfile, EBCDIC)) { - c = *p; - if (ISXDIGIT (c)) - code = (code << 4) + hex_digit_value (c); - else - { - cpp_error (pfile, DL_ERROR, - "non-hex digit '%c' in universal-character-name", c); - /* We shouldn't skip in case there are multibyte chars. */ - break; - } + cpp_error (pfile, DL_ERROR, + "universal character with an EBCDIC target"); + result = 0x3f; /* EBCDIC invalid character */ } } - if (CPP_OPTION (pfile, EBCDIC)) - { - cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target"); - code = 0x3f; /* EBCDIC invalid character */ - } - /* True extended characters are OK. */ - else if (code >= 0xa0 - && !(code & 0x80000000) - && !(code >= 0xD800 && code <= 0xDFFF)) - ; - /* The standard permits $, @ and ` to be specified as UCNs. We use - hex escapes so that this also works with EBCDIC hosts. */ - else if (code == 0x24 || code == 0x40 || code == 0x60) - ; - /* Don't give another error if one occurred above. */ - else if (length == 0) - cpp_error (pfile, DL_ERROR, "universal-character-name out of range"); - - *pstr = p; - *pc = code; - return 0; + return result; } /* Returns the value of an escape sequence, truncated to the correct @@ -1470,7 +1437,7 @@ cpp_parse_escape (pfile, pstr, limit, wide) int unknown = 0; const unsigned char *str = *pstr, *charconsts; - cppchar_t c, mask; + cppchar_t c, ucn, mask; unsigned int width; if (CPP_OPTION (pfile, EBCDIC)) @@ -1519,7 +1486,11 @@ cpp_parse_escape (pfile, pstr, limit, wide) break; case 'u': case 'U': - unknown = maybe_read_ucs (pfile, &str, limit, &c); + ucn = maybe_read_ucn (pfile, &str); + if (ucn) + c = ucn; + else + unknown = true; break; case 'x': diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 524363f..41df38f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2003-04-20 Neil Booth <neil@daikokuya.co.uk> + + * ucs.c: Update diagnostic messages. + 2003-04-19 Neil Booth <neil@daikokuya.co.uk> * gcc.dg/cpp/truefalse.cpp: New test. diff --git a/gcc/testsuite/gcc.dg/cpp/ucs.c b/gcc/testsuite/gcc.dg/cpp/ucs.c index 1dfe444..d36e0dc 100644 --- a/gcc/testsuite/gcc.dg/cpp/ucs.c +++ b/gcc/testsuite/gcc.dg/cpp/ucs.c @@ -51,7 +51,7 @@ void foo () c = L'\ubad'; /* { dg-error "incomplete" "incompete UCN 1" } */ c = L"\U1234"[0]; /* { dg-error "incomplete" "incompete UCN 2" } */ - c = L'\u000x'; /* { dg-error "non-hex" "non-hex digit in UCN" } */ + c = L'\u000x'; /* { dg-error "incomplete" "non-hex digit in UCN" } */ /* If sizeof(HOST_WIDE_INT) > sizeof(wchar_t), we can get a multi-character constant warning even for wide characters. */ /* { dg-warning "too long|multi-character" "" { target *-*-* } 54 } */ @@ -61,7 +61,7 @@ void foo () c = '\u00a0'; /* { dg-bogus "invalid" "00a0 is a valid UCN" } */ c = '\U00000060'; /* { dg-bogus "invalid" "0060 is a valid UCN" } */ - c = '\u0025'; /* { dg-error "range" "0025 is an invalid UCN" } */ - c = L"\uD800"[0]; /* { dg-error "range" "D800 is an invalid UCN" } */ - c = L'\U0000DFFF'; /* { dg-error "range" "DFFF is an invalid UCN" } */ + c = '\u0025'; /* { dg-error "not a valid" "0025 invalid UCN" } */ + c = L"\uD800"[0]; /* { dg-error "not a valid" "D800 invalid UCN" } */ + c = L'\U0000DFFF'; /* { dg-error "not a valid" "DFFF invalid UCN" } */ } |