diff options
author | Joseph Myers <joseph@codesourcery.com> | 2013-11-16 00:05:08 +0000 |
---|---|---|
committer | Joseph Myers <jsm28@gcc.gnu.org> | 2013-11-16 00:05:08 +0000 |
commit | d3f4ff8b51b8875a7862fae1fb574735db4bfd36 (patch) | |
tree | ddce12237d1c0fb641f1d74daf972657ec50caca /libcpp/charset.c | |
parent | 3d053a5f72d60cc868defb5108ac0b28bdd9ef4c (diff) | |
download | gcc-d3f4ff8b51b8875a7862fae1fb574735db4bfd36.zip gcc-d3f4ff8b51b8875a7862fae1fb574735db4bfd36.tar.gz gcc-d3f4ff8b51b8875a7862fae1fb574735db4bfd36.tar.bz2 |
ucnid-2011-1.c: New test.
gcc/testsuite:
* c-c++-common/cpp/ucnid-2011-1.c: New test.
libcpp:
* ucnid.tab: Add C11 and C11NOSTART data.
* makeucnid.c (digit): Rename enum value to N99.
(C11, N11, all_languages): New enum values.
(NUM_CODE_POINTS, MAX_CODE_POINT): New macros.
(flags, decomp, combining_value): Use NUM_CODE_POINTS as array
size.
(decomp): Use unsigned int as element type.
(all_decomp): New array.
(read_ucnid): Handle C11 and C11NOSTART. Use MAX_CODE_POINT.
(read_table): Use MAX_CODE_POINT. Store all decompositions in
all_decomp.
(read_derived): Use MAX_CODE_POINT.
(write_table): Use NUM_CODE_POINTS. Print N99, C11 and N11
flags. Print whole array variable declaration rather than just
array contents.
(char_id_valid, write_context_switch): New functions.
(main): Call write_context_switch.
* ucnid.h: Regenerate.
* include/cpplib.h (struct cpp_options): Add c11_identifiers.
* init.c (struct lang_flags): Add c11_identifiers.
(cpp_set_lang): Set c11_identifiers option from selected language.
* internal.h (struct normalize_state): Document "previous" as
previous starter character.
(NORMALIZE_STATE_UPDATE_IDNUM): Take character as argument.
* charset.c (DIG): Rename enum value to N99.
(C11, N11): New enum values.
(struct ucnrange): Give name to struct. Use short for flags and
unsigned int for end of range. Include ucnid.h for whole variable
declaration.
(ucn_valid_in_identifier): Allow for characters up to 0x10FFFF.
Allow for C11 in determining valid characters and valid start
characters. Use check_nfc for non-Hangul context-dependent
checks. Only store starter characters in nst->previous.
(_cpp_valid_ucn): Pass new argument to
NORMALIZE_STATE_UPDATE_IDNUM.
* lex.c (lex_identifier): Pass new argument to
NORMALIZE_STATE_UPDATE_IDNUM. Call NORMALIZE_STATE_UPDATE_IDNUM
after initial non-UCN part of identifier.
(lex_number): Pass new argument to NORMALIZE_STATE_UPDATE_IDNUM.
From-SVN: r204886
Diffstat (limited to 'libcpp/charset.c')
-rw-r--r-- | libcpp/charset.c | 88 |
1 files changed, 46 insertions, 42 deletions
diff --git a/libcpp/charset.c b/libcpp/charset.c index ae56c5a..c48e64a 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -828,29 +828,32 @@ enum { /* Valid in a C99 identifier? */ C99 = 1, /* Valid in a C99 identifier, but not as the first character? */ - DIG = 2, + N99 = 2, /* Valid in a C++ identifier? */ CXX = 4, + /* Valid in a C11/C++11 identifier? */ + C11 = 8, + /* Valid in a C11/C++11 identifier, but not as the first character? */ + N11 = 16, /* NFC representation is not valid in an identifier? */ - CID = 8, + CID = 32, /* Might be valid NFC form? */ - NFC = 16, + NFC = 64, /* Might be valid NFKC form? */ - NKC = 32, + NKC = 128, /* Certain preceding characters might make it not valid NFC/NKFC form? */ - CTX = 64 + CTX = 256 }; -static const struct { +struct ucnrange { /* Bitmap of flags above. */ - unsigned char flags; + unsigned short flags; /* Combining class of the character. */ unsigned char combine; /* Last character in the range described by this entry. */ - unsigned short end; -} ucnranges[] = { -#include "ucnid.h" + unsigned int end; }; +#include "ucnid.h" /* Returns 1 if C is valid in an identifier, 2 if C is valid except at the start of an identifier, and 0 if C is not valid in an @@ -864,8 +867,9 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, struct normalize_state *nst) { int mn, mx, md; + unsigned short valid_flags, invalid_start_flags; - if (c > 0xFFFF) + if (c > 0x10FFFF) return 0; mn = 0; @@ -881,15 +885,25 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, /* When -pedantic, we require the character to have been listed by the standard for the current language. Otherwise, we accept the - union of the acceptable sets for C++98 and C99. */ - if (! (ucnranges[mn].flags & (C99 | CXX))) + union of the acceptable sets for all supported language versions. */ + valid_flags = C99 | CXX | C11; + if (CPP_PEDANTIC (pfile)) + { + if (CPP_OPTION (pfile, c11_identifiers)) + valid_flags = C11; + else if (CPP_OPTION (pfile, c99)) + valid_flags = C99; + else if (CPP_OPTION (pfile, cplusplus)) + valid_flags = CXX; + } + if (! (ucnranges[mn].flags & valid_flags)) return 0; - - if (CPP_PEDANTIC (pfile) - && ((CPP_OPTION (pfile, c99) && !(ucnranges[mn].flags & C99)) - || (CPP_OPTION (pfile, cplusplus) - && !(ucnranges[mn].flags & CXX)))) - return 0; + if (CPP_OPTION (pfile, c11_identifiers)) + invalid_start_flags = N11; + else if (CPP_OPTION (pfile, c99)) + invalid_start_flags = N99; + else + invalid_start_flags = 0; /* Update NST. */ if (ucnranges[mn].combine != 0 && ucnranges[mn].combine < nst->prev_class) @@ -899,17 +913,6 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, bool safe; cppchar_t p = nst->previous; - /* Easy cases from Bengali, Oriya, Tamil, Jannada, and Malayalam. */ - if (c == 0x09BE) - safe = p != 0x09C7; /* Use 09CB instead of 09C7 09BE. */ - else if (c == 0x0B3E) - safe = p != 0x0B47; /* Use 0B4B instead of 0B47 0B3E. */ - else if (c == 0x0BBE) - safe = p != 0x0BC6 && p != 0x0BC7; /* Use 0BCA/0BCB instead. */ - else if (c == 0x0CC2) - safe = p != 0x0CC6; /* Use 0CCA instead of 0CC6 0CC2. */ - else if (c == 0x0D3E) - safe = p != 0x0D46 && p != 0x0D47; /* Use 0D4A/0D4B instead. */ /* For Hangul, characters in the range AC00-D7A3 are NFC/NFKC, and are combined algorithmically from a sequence of the form 1100-1112 1161-1175 11A8-11C2 @@ -917,20 +920,19 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, really a valid character). Unfortunately, C99 allows (only) the NFC form, but C++ allows only the combining characters. */ - else if (c >= 0x1161 && c <= 0x1175) + if (c >= 0x1161 && c <= 0x1175) safe = p < 0x1100 || p > 0x1112; else if (c >= 0x11A8 && c <= 0x11C2) safe = (p < 0xAC00 || p > 0xD7A3 || (p - 0xAC00) % 28 != 0); else + safe = check_nfc (pfile, c, p); + if (!safe) { - /* Uh-oh, someone updated ucnid.h without updating this code. */ - cpp_error (pfile, CPP_DL_ICE, "Character %x might not be NFKC", c); - safe = true; + if ((c >= 0x1161 && c <= 0x1175) || (c >= 0x11A8 && c <= 0x11C2)) + nst->level = MAX (nst->level, normalized_identifier_C); + else + nst->level = normalized_none; } - if (!safe && c < 0x1161) - nst->level = normalized_none; - else if (!safe) - nst->level = MAX (nst->level, normalized_identifier_C); } else if (ucnranges[mn].flags & NKC) ; @@ -940,11 +942,13 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c, nst->level = MAX (nst->level, normalized_identifier_C); else nst->level = normalized_none; - nst->previous = c; + if (ucnranges[mn].combine == 0) + nst->previous = c; nst->prev_class = ucnranges[mn].combine; - /* In C99, UCN digits may not begin identifiers. */ - if (CPP_OPTION (pfile, c99) && (ucnranges[mn].flags & DIG)) + /* In C99, UCN digits may not begin identifiers. In C11 and C++11, + UCN combining characters may not begin identifiers. */ + if (ucnranges[mn].flags & invalid_start_flags) return 2; return 1; @@ -1054,7 +1058,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, CPP_OPTION (pfile, warn_dollars) = 0; cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number"); } - NORMALIZE_STATE_UPDATE_IDNUM (nst); + NORMALIZE_STATE_UPDATE_IDNUM (nst, result); } else if (identifier_pos) { |