aboutsummaryrefslogtreecommitdiff
path: root/libcpp/charset.c
diff options
context:
space:
mode:
Diffstat (limited to 'libcpp/charset.c')
-rw-r--r--libcpp/charset.c88
1 files changed, 46 insertions, 42 deletions
diff --git a/libcpp/charset.c b/libcpp/charset.c
index ae56c5a..c48e64a 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -828,29 +828,32 @@ enum {
/* Valid in a C99 identifier? */
C99 = 1,
/* Valid in a C99 identifier, but not as the first character? */
- DIG = 2,
+ N99 = 2,
/* Valid in a C++ identifier? */
CXX = 4,
+ /* Valid in a C11/C++11 identifier? */
+ C11 = 8,
+ /* Valid in a C11/C++11 identifier, but not as the first character? */
+ N11 = 16,
/* NFC representation is not valid in an identifier? */
- CID = 8,
+ CID = 32,
/* Might be valid NFC form? */
- NFC = 16,
+ NFC = 64,
/* Might be valid NFKC form? */
- NKC = 32,
+ NKC = 128,
/* Certain preceding characters might make it not valid NFC/NKFC form? */
- CTX = 64
+ CTX = 256
};
-static const struct {
+struct ucnrange {
/* Bitmap of flags above. */
- unsigned char flags;
+ unsigned short flags;
/* Combining class of the character. */
unsigned char combine;
/* Last character in the range described by this entry. */
- unsigned short end;
-} ucnranges[] = {
-#include "ucnid.h"
+ unsigned int end;
};
+#include "ucnid.h"
/* Returns 1 if C is valid in an identifier, 2 if C is valid except at
the start of an identifier, and 0 if C is not valid in an
@@ -864,8 +867,9 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
struct normalize_state *nst)
{
int mn, mx, md;
+ unsigned short valid_flags, invalid_start_flags;
- if (c > 0xFFFF)
+ if (c > 0x10FFFF)
return 0;
mn = 0;
@@ -881,15 +885,25 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
/* When -pedantic, we require the character to have been listed by
the standard for the current language. Otherwise, we accept the
- union of the acceptable sets for C++98 and C99. */
- if (! (ucnranges[mn].flags & (C99 | CXX)))
+ union of the acceptable sets for all supported language versions. */
+ valid_flags = C99 | CXX | C11;
+ if (CPP_PEDANTIC (pfile))
+ {
+ if (CPP_OPTION (pfile, c11_identifiers))
+ valid_flags = C11;
+ else if (CPP_OPTION (pfile, c99))
+ valid_flags = C99;
+ else if (CPP_OPTION (pfile, cplusplus))
+ valid_flags = CXX;
+ }
+ if (! (ucnranges[mn].flags & valid_flags))
return 0;
-
- if (CPP_PEDANTIC (pfile)
- && ((CPP_OPTION (pfile, c99) && !(ucnranges[mn].flags & C99))
- || (CPP_OPTION (pfile, cplusplus)
- && !(ucnranges[mn].flags & CXX))))
- return 0;
+ if (CPP_OPTION (pfile, c11_identifiers))
+ invalid_start_flags = N11;
+ else if (CPP_OPTION (pfile, c99))
+ invalid_start_flags = N99;
+ else
+ invalid_start_flags = 0;
/* Update NST. */
if (ucnranges[mn].combine != 0 && ucnranges[mn].combine < nst->prev_class)
@@ -899,17 +913,6 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
bool safe;
cppchar_t p = nst->previous;
- /* Easy cases from Bengali, Oriya, Tamil, Jannada, and Malayalam. */
- if (c == 0x09BE)
- safe = p != 0x09C7; /* Use 09CB instead of 09C7 09BE. */
- else if (c == 0x0B3E)
- safe = p != 0x0B47; /* Use 0B4B instead of 0B47 0B3E. */
- else if (c == 0x0BBE)
- safe = p != 0x0BC6 && p != 0x0BC7; /* Use 0BCA/0BCB instead. */
- else if (c == 0x0CC2)
- safe = p != 0x0CC6; /* Use 0CCA instead of 0CC6 0CC2. */
- else if (c == 0x0D3E)
- safe = p != 0x0D46 && p != 0x0D47; /* Use 0D4A/0D4B instead. */
/* For Hangul, characters in the range AC00-D7A3 are NFC/NFKC,
and are combined algorithmically from a sequence of the form
1100-1112 1161-1175 11A8-11C2
@@ -917,20 +920,19 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
really a valid character).
Unfortunately, C99 allows (only) the NFC form, but C++ allows
only the combining characters. */
- else if (c >= 0x1161 && c <= 0x1175)
+ if (c >= 0x1161 && c <= 0x1175)
safe = p < 0x1100 || p > 0x1112;
else if (c >= 0x11A8 && c <= 0x11C2)
safe = (p < 0xAC00 || p > 0xD7A3 || (p - 0xAC00) % 28 != 0);
else
+ safe = check_nfc (pfile, c, p);
+ if (!safe)
{
- /* Uh-oh, someone updated ucnid.h without updating this code. */
- cpp_error (pfile, CPP_DL_ICE, "Character %x might not be NFKC", c);
- safe = true;
+ if ((c >= 0x1161 && c <= 0x1175) || (c >= 0x11A8 && c <= 0x11C2))
+ nst->level = MAX (nst->level, normalized_identifier_C);
+ else
+ nst->level = normalized_none;
}
- if (!safe && c < 0x1161)
- nst->level = normalized_none;
- else if (!safe)
- nst->level = MAX (nst->level, normalized_identifier_C);
}
else if (ucnranges[mn].flags & NKC)
;
@@ -940,11 +942,13 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
nst->level = MAX (nst->level, normalized_identifier_C);
else
nst->level = normalized_none;
- nst->previous = c;
+ if (ucnranges[mn].combine == 0)
+ nst->previous = c;
nst->prev_class = ucnranges[mn].combine;
- /* In C99, UCN digits may not begin identifiers. */
- if (CPP_OPTION (pfile, c99) && (ucnranges[mn].flags & DIG))
+ /* In C99, UCN digits may not begin identifiers. In C11 and C++11,
+ UCN combining characters may not begin identifiers. */
+ if (ucnranges[mn].flags & invalid_start_flags)
return 2;
return 1;
@@ -1054,7 +1058,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
CPP_OPTION (pfile, warn_dollars) = 0;
cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
}
- NORMALIZE_STATE_UPDATE_IDNUM (nst);
+ NORMALIZE_STATE_UPDATE_IDNUM (nst, result);
}
else if (identifier_pos)
{