aboutsummaryrefslogtreecommitdiff
path: root/gcc/cpplex.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/cpplex.c')
-rw-r--r--gcc/cpplex.c289
1 files changed, 1 insertions, 288 deletions
diff --git a/gcc/cpplex.c b/gcc/cpplex.c
index c536c76..edb765d 100644
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -64,10 +64,8 @@ static void create_literal (cpp_reader *, cpp_token *, const uchar *,
unsigned int, enum cpp_ttype);
static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
static int name_p (cpp_reader *, const cpp_string *);
-static cppchar_t maybe_read_ucn (cpp_reader *, const uchar **);
static tokenrun *next_tokenrun (tokenrun *);
-static unsigned int hex_digit_value (unsigned int);
static _cpp_buff *new_buff (size_t);
@@ -397,7 +395,7 @@ forms_identifier_p (cpp_reader *pfile, int first)
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{
buffer->cur += 2;
- if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
+ if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
return true;
buffer->cur -= 2;
}
@@ -1316,291 +1314,6 @@ cpp_output_line (cpp_reader *pfile, FILE *fp)
putc ('\n', fp);
}
-/* Returns the value of a hexadecimal digit. */
-static unsigned int
-hex_digit_value (unsigned int c)
-{
- if (hex_p (c))
- return hex_value (c);
- else
- abort ();
-}
-
-/* Read a possible universal character name starting at *PSTR. */
-static cppchar_t
-maybe_read_ucn (cpp_reader *pfile, const uchar **pstr)
-{
- cppchar_t result, c = (*pstr)[-1];
-
- result = _cpp_valid_ucn (pfile, pstr, false);
- if (result)
- {
- if (CPP_WTRADITIONAL (pfile))
- cpp_error (pfile, DL_WARNING,
- "the meaning of '\\%c' is different in traditional C",
- (int) c);
-
- if (CPP_OPTION (pfile, EBCDIC))
- {
- cpp_error (pfile, DL_ERROR,
- "universal character with an EBCDIC target");
- result = 0x3f; /* EBCDIC invalid character */
- }
- }
-
- return result;
-}
-
-/* Returns the value of an escape sequence, truncated to the correct
- target precision. PSTR points to the input pointer, which is just
- after the backslash. LIMIT is how much text we have. WIDE is true
- if the escape sequence is part of a wide character constant or
- string literal. Handles all relevant diagnostics. */
-cppchar_t
-cpp_parse_escape (cpp_reader *pfile, const unsigned char **pstr,
- const unsigned char *limit, int wide)
-{
- /* Values of \a \b \e \f \n \r \t \v respectively. */
- static const uchar ascii[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
- static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
-
- int unknown = 0;
- const unsigned char *str = *pstr, *charconsts;
- cppchar_t c, ucn, mask;
- unsigned int width;
-
- if (CPP_OPTION (pfile, EBCDIC))
- charconsts = ebcdic;
- else
- charconsts = ascii;
-
- if (wide)
- width = CPP_OPTION (pfile, wchar_precision);
- else
- width = CPP_OPTION (pfile, char_precision);
- if (width < BITS_PER_CPPCHAR_T)
- mask = ((cppchar_t) 1 << width) - 1;
- else
- mask = ~0;
-
- c = *str++;
- switch (c)
- {
- case '\\': case '\'': case '"': case '?': break;
- case 'b': c = charconsts[1]; break;
- case 'f': c = charconsts[3]; break;
- case 'n': c = charconsts[4]; break;
- case 'r': c = charconsts[5]; break;
- case 't': c = charconsts[6]; break;
- case 'v': c = charconsts[7]; break;
-
- case '(': case '{': case '[': case '%':
- /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
- '\%' is used to prevent SCCS from getting confused. */
- unknown = CPP_PEDANTIC (pfile);
- break;
-
- case 'a':
- if (CPP_WTRADITIONAL (pfile))
- cpp_error (pfile, DL_WARNING,
- "the meaning of '\\a' is different in traditional C");
- c = charconsts[0];
- break;
-
- case 'e': case 'E':
- if (CPP_PEDANTIC (pfile))
- cpp_error (pfile, DL_PEDWARN,
- "non-ISO-standard escape sequence, '\\%c'", (int) c);
- c = charconsts[2];
- break;
-
- case 'u': case 'U':
- ucn = maybe_read_ucn (pfile, &str);
- if (ucn)
- c = ucn;
- else
- unknown = true;
- break;
-
- case 'x':
- if (CPP_WTRADITIONAL (pfile))
- cpp_error (pfile, DL_WARNING,
- "the meaning of '\\x' is different in traditional C");
-
- {
- cppchar_t i = 0, overflow = 0;
- int digits_found = 0;
-
- while (str < limit)
- {
- c = *str;
- if (! ISXDIGIT (c))
- break;
- str++;
- overflow |= i ^ (i << 4 >> 4);
- i = (i << 4) + hex_digit_value (c);
- digits_found = 1;
- }
-
- if (!digits_found)
- cpp_error (pfile, DL_ERROR,
- "\\x used with no following hex digits");
-
- if (overflow | (i != (i & mask)))
- {
- cpp_error (pfile, DL_PEDWARN,
- "hex escape sequence out of range");
- i &= mask;
- }
- c = i;
- }
- break;
-
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- {
- size_t count = 0;
- cppchar_t i = c - '0';
-
- while (str < limit && ++count < 3)
- {
- c = *str;
- if (c < '0' || c > '7')
- break;
- str++;
- i = (i << 3) + c - '0';
- }
-
- if (i != (i & mask))
- {
- cpp_error (pfile, DL_PEDWARN,
- "octal escape sequence out of range");
- i &= mask;
- }
- c = i;
- }
- break;
-
- default:
- unknown = 1;
- break;
- }
-
- if (unknown)
- {
- if (ISGRAPH (c))
- cpp_error (pfile, DL_PEDWARN,
- "unknown escape sequence '\\%c'", (int) c);
- else
- cpp_error (pfile, DL_PEDWARN,
- "unknown escape sequence: '\\%03o'", (int) c);
- }
-
- if (c > mask)
- {
- cpp_error (pfile, DL_PEDWARN,
- "escape sequence out of range for its type");
- c &= mask;
- }
-
- *pstr = str;
- return c;
-}
-
-/* Interpret a (possibly wide) character constant in TOKEN.
- WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
- points to a variable that is filled in with the number of
- characters seen, and UNSIGNEDP to a variable that indicates whether
- the result has signed type. */
-cppchar_t
-cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
- unsigned int *pchars_seen, int *unsignedp)
-{
- const unsigned char *str, *limit;
- unsigned int chars_seen = 0;
- size_t width, max_chars;
- cppchar_t c, mask, result = 0;
- bool unsigned_p;
-
- str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
- limit = token->val.str.text + token->val.str.len - 1;
-
- if (token->type == CPP_CHAR)
- {
- width = CPP_OPTION (pfile, char_precision);
- max_chars = CPP_OPTION (pfile, int_precision) / width;
- unsigned_p = CPP_OPTION (pfile, unsigned_char);
- }
- else
- {
- width = CPP_OPTION (pfile, wchar_precision);
- max_chars = 1;
- unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
- }
-
- if (width < BITS_PER_CPPCHAR_T)
- mask = ((cppchar_t) 1 << width) - 1;
- else
- mask = ~0;
-
- while (str < limit)
- {
- c = *str++;
-
- if (c == '\\')
- c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
-
-#ifdef MAP_CHARACTER
- if (ISPRINT (c))
- c = MAP_CHARACTER (c);
-#endif
-
- chars_seen++;
-
- /* Truncate the character, scale the result and merge the two. */
- c &= mask;
- if (width < BITS_PER_CPPCHAR_T)
- result = (result << width) | c;
- else
- result = c;
- }
-
- if (chars_seen == 0)
- cpp_error (pfile, DL_ERROR, "empty character constant");
- else if (chars_seen > 1)
- {
- /* Multichar charconsts are of type int and therefore signed. */
- unsigned_p = 0;
-
- if (chars_seen > max_chars)
- {
- chars_seen = max_chars;
- cpp_error (pfile, DL_WARNING,
- "character constant too long for its type");
- }
- else if (CPP_OPTION (pfile, warn_multichar))
- cpp_error (pfile, DL_WARNING, "multi-character character constant");
- }
-
- /* Sign-extend or truncate the constant to cppchar_t. The value is
- in WIDTH bits, but for multi-char charconsts it's value is the
- full target type's width. */
- if (chars_seen > 1)
- width *= max_chars;
- if (width < BITS_PER_CPPCHAR_T)
- {
- mask = ((cppchar_t) 1 << width) - 1;
- if (unsigned_p || !(result & (1 << (width - 1))))
- result &= mask;
- else
- result |= ~mask;
- }
-
- *pchars_seen = chars_seen;
- *unsignedp = unsigned_p;
- return result;
-}
-
/* Memory buffers. Changing these three constants can have a dramatic
effect on performance. The values here are reasonable defaults,
but might be tuned. If you adjust them, be sure to test across a