aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorNeil Booth <neil@daikokuya.co.uk>2003-04-19 11:59:44 +0000
committerNeil Booth <neil@gcc.gnu.org>2003-04-19 11:59:44 +0000
commitbced6edfc6c487a9cf671f08417be163904d8def (patch)
treed205dc47adef09a8364abbb312879df8c18a5aeb /gcc
parentf1d1b741e82757450fb9daf1bad2ea85c792ad4e (diff)
downloadgcc-bced6edfc6c487a9cf671f08417be163904d8def.zip
gcc-bced6edfc6c487a9cf671f08417be163904d8def.tar.gz
gcc-bced6edfc6c487a9cf671f08417be163904d8def.tar.bz2
cpphash.h (struct cpp_reader): New member warned_dollar.
* cpphash.h (struct cpp_reader): New member warned_dollar. * cpplex.c (continues_identifier_p): New function. (parse_identifier, parse_number, parse_string): Rename lex_identifer, lex_number and lex_string, and simplify. (parse_slow, unescaped_terminator_p): Die. (_cpp_lex_direct): Update. From-SVN: r65816
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/cpphash.h4
-rw-r--r--gcc/cpplex.c315
3 files changed, 114 insertions, 214 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c95cfd8..4a987d56 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2003-04-19 Neil Booth <neil@daikokuya.co.uk>
+
+ * cpphash.h (struct cpp_reader): New member warned_dollar.
+ * cpplex.c (continues_identifier_p): New function.
+ (parse_identifier, parse_number, parse_string): Rename lex_identifer,
+ lex_number and lex_string, and simplify.
+ (parse_slow, unescaped_terminator_p): Die.
+ (_cpp_lex_direct): Update.
+
2003-04-19 Richard Kenner <kenner@vlsi1.ultra.nyu.edu>
* calls.c (expand_call): Provide init for old_stack_pointer_delta.
diff --git a/gcc/cpphash.h b/gcc/cpphash.h
index 4b5cd49..a551aad 100644
--- a/gcc/cpphash.h
+++ b/gcc/cpphash.h
@@ -381,6 +381,10 @@ struct cpp_reader
cpp_token avoid_paste;
cpp_token eof;
+ /* True if we have already warned about dollars in identifiers or
+ numbers for this buffer. */
+ bool warned_dollar;
+
/* Opaque handle to the dependencies of mkdeps.c. */
struct deps *deps;
diff --git a/gcc/cpplex.c b/gcc/cpplex.c
index ce9a8d4..ad68b73 100644
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -62,12 +62,10 @@ static cppchar_t get_effective_char PARAMS ((cpp_reader *));
static int skip_line_comment PARAMS ((cpp_reader *));
static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
-static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
-static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
- unsigned int *));
-static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
-static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
-static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
+static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *));
+static void lex_number PARAMS ((cpp_reader *, cpp_string *));
+static bool continues_identifier_p PARAMS ((cpp_reader *));
+static void lex_string PARAMS ((cpp_reader *, cpp_token *));
static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
cppchar_t));
static int name_p PARAMS ((cpp_reader *, const cpp_string *));
@@ -377,46 +375,50 @@ name_p (pfile, string)
return 1;
}
-/* Parse an identifier, skipping embedded backslash-newlines. This is
- a critical inner loop. The common case is an identifier which has
- not been split by backslash-newline, does not contain a dollar
- sign, and has already been scanned (roughly 10:1 ratio of
- seen:unseen identifiers in normal code; the distribution is
- Poisson-like). Second most common case is a new identifier, not
- split and no dollar sign. The other possibilities are rare and
- have been relegated to parse_slow. */
+/* Returns TRUE if the sequence starting at buffer->cur is invalid in
+ an identifier. */
+static bool
+continues_identifier_p (pfile)
+ cpp_reader *pfile;
+{
+ if (*pfile->buffer->cur != '$')
+ return false;
+
+ if (CPP_PEDANTIC (pfile) && !pfile->state.skipping && !pfile->warned_dollar)
+ {
+ pfile->warned_dollar = true;
+ cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
+ }
+ pfile->buffer->cur++;
+
+ return true;
+}
+
+/* Lex an identifier starting at BUFFER->CUR - 1. */
static cpp_hashnode *
-parse_identifier (pfile)
+lex_identifier (pfile)
cpp_reader *pfile;
{
cpp_hashnode *result;
const uchar *cur, *base;
- /* Fast-path loop. Skim over a normal identifier.
- N.B. ISIDNUM does not include $. */
- cur = pfile->buffer->cur;
- while (ISIDNUM (*cur))
- cur++;
-
- /* Check for slow-path cases. */
- if (*cur == '$')
+ base = pfile->buffer->cur - 1;
+ do
{
- unsigned int len;
+ cur = pfile->buffer->cur;
+
+ /* N.B. ISIDNUM does not include $. */
+ while (ISIDNUM (*cur))
+ cur++;
- base = parse_slow (pfile, cur, 0, &len);
- result = (cpp_hashnode *)
- ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
- }
- else
- {
- base = pfile->buffer->cur - 1;
pfile->buffer->cur = cur;
- result = (cpp_hashnode *)
- ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
}
+ while (continues_identifier_p (pfile));
- /* Rarely, identifiers require diagnostics when lexed.
- XXX Has to be forced out of the fast path. */
+ result = (cpp_hashnode *)
+ ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
+
+ /* Rarely, identifiers require diagnostics when lexed. */
if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
&& !pfile->state.skipping, 0))
{
@@ -436,188 +438,66 @@ parse_identifier (pfile)
return result;
}
-/* Slow path. This handles numbers and identifiers which have been
- split, or contain dollar signs. The part of the token from
- PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
- 1 if it's a number, and 2 if it has a leading period. Returns a
- pointer to the token's NUL-terminated spelling in permanent
- storage, and sets PLEN to its length. */
-static uchar *
-parse_slow (pfile, cur, number_p, plen)
- cpp_reader *pfile;
- const uchar *cur;
- int number_p;
- unsigned int *plen;
-{
- cpp_buffer *buffer = pfile->buffer;
- const uchar *base = buffer->cur - 1;
- struct obstack *stack = &pfile->hash_table->stack;
- unsigned int c, prevc, saw_dollar = 0;
-
- /* Place any leading period. */
- if (number_p == 2)
- obstack_1grow (stack, '.');
-
- /* Copy the part of the token which is known to be okay. */
- obstack_grow (stack, base, cur - base);
-
- /* Now process the part which isn't. We are looking at one of
- '$', '\\', or '?' on entry to this loop. */
- prevc = cur[-1];
- c = *cur++;
- buffer->cur = cur;
- for (;;)
- {
- /* Potential escaped newline? */
- buffer->backup_to = buffer->cur - 1;
-
- if (!is_idchar (c))
- {
- if (!number_p)
- break;
- if (c != '.' && !VALID_SIGN (c, prevc))
- break;
- }
-
- /* Handle normal identifier characters in this loop. */
- do
- {
- prevc = c;
- obstack_1grow (stack, c);
-
- if (c == '$')
- saw_dollar++;
-
- c = *buffer->cur++;
- }
- while (is_idchar (c));
- }
-
- /* Step back over the unwanted char. */
- BACKUP ();
-
- /* $ is not an identifier character in the standard, but is commonly
- accepted as an extension. Don't warn about it in skipped
- conditional blocks. */
- if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
- cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
-
- /* Identifiers and numbers are null-terminated. */
- *plen = obstack_object_size (stack);
- obstack_1grow (stack, '\0');
- return obstack_finish (stack);
-}
-
-/* Parse a number, beginning with character C, skipping embedded
- backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
- before C. Place the result in NUMBER. */
+/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
static void
-parse_number (pfile, number, leading_period)
+lex_number (pfile, number)
cpp_reader *pfile;
cpp_string *number;
- int leading_period;
{
const uchar *cur;
+ const uchar *base;
+ uchar *dest;
- /* Fast-path loop. Skim over a normal number.
- N.B. ISIDNUM does not include $. */
- cur = pfile->buffer->cur;
- while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
- cur++;
-
- /* Check for slow-path cases. */
- if (*cur == '$')
- number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
- else
+ base = pfile->buffer->cur - 1;
+ do
{
- const uchar *base = pfile->buffer->cur - 1;
- uchar *dest;
+ cur = pfile->buffer->cur;
- number->len = cur - base + leading_period;
- dest = _cpp_unaligned_alloc (pfile, number->len + 1);
- dest[number->len] = '\0';
- number->text = dest;
+ /* N.B. ISIDNUM does not include $. */
+ while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
+ cur++;
- if (leading_period)
- *dest++ = '.';
- memcpy (dest, base, cur - base);
pfile->buffer->cur = cur;
}
-}
-
-/* Subroutine of parse_string. */
-static int
-unescaped_terminator_p (pfile, dest)
- cpp_reader *pfile;
- const unsigned char *dest;
-{
- const unsigned char *start, *temp;
-
- /* In #include-style directives, terminators are not escapable. */
- if (pfile->state.angled_headers)
- return 1;
-
- start = BUFF_FRONT (pfile->u_buff);
+ while (continues_identifier_p (pfile));
- /* An odd number of consecutive backslashes represents an escaped
- terminator. */
- for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
- ;
-
- return ((dest - temp) & 1) == 0;
+ number->len = cur - base;
+ dest = _cpp_unaligned_alloc (pfile, number->len + 1);
+ memcpy (dest, base, number->len);
+ dest[number->len] = '\0';
+ number->text = dest;
}
-/* Parses a string, character constant, or angle-bracketed header file
- name. Handles embedded trigraphs and escaped newlines. The stored
- string is guaranteed NUL-terminated, but it is not guaranteed that
- this is the first NUL since embedded NULs are preserved.
-
- When this function returns, buffer->cur points to the next
- character to be processed. */
+/* Lexes a string, character constant, or angle-bracketed header file
+ name. The stored string is guaranteed NUL-terminated, but it is
+ not guaranteed that this is the first NUL since embedded NULs are
+ preserved. */
static void
-parse_string (pfile, token, terminator)
+lex_string (pfile, token)
cpp_reader *pfile;
cpp_token *token;
- cppchar_t terminator;
{
cpp_buffer *buffer = pfile->buffer;
- unsigned char *dest, *limit;
- cppchar_t c;
bool warned_nulls = false;
+ const uchar *base;
+ uchar *dest;
+ cppchar_t terminator;
- dest = BUFF_FRONT (pfile->u_buff);
- limit = BUFF_LIMIT (pfile->u_buff);
+ base = buffer->cur;
+ terminator = base[-1];
+ if (terminator == '<')
+ terminator = '>';
for (;;)
{
- /* We need room for another char, possibly the terminating NUL. */
- if ((size_t) (limit - dest) < 1)
- {
- size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
- _cpp_extend_buff (pfile, &pfile->u_buff, 2);
- dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
- limit = BUFF_LIMIT (pfile->u_buff);
- }
-
- c = *buffer->cur++;
+ cppchar_t c = *buffer->cur++;
- if (c == terminator)
- {
- if (unescaped_terminator_p (pfile, dest))
- break;
- }
- else if (c == '\n')
- {
- /* No string literal may extend over multiple lines. In
- assembly language, suppress the error except for <>
- includes. This is a kludge around not knowing where
- comments are. */
- if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
- cpp_error (pfile, DL_ERROR, "missing terminating %c character",
- (int) terminator);
- buffer->cur--;
- break;
- }
+ /* In #include-style directives, terminators are not escapable.
+ \n can follow the '\\' if the file's last byte is '\\'. */
+ if (c == '\\' && !pfile->state.angled_headers && *buffer->cur != '\n')
+ buffer->cur++;
+ else if (c == terminator || c == '\n')
+ break;
else if (c == '\0')
{
if (!warned_nulls)
@@ -627,14 +507,25 @@ parse_string (pfile, token, terminator)
"null character(s) preserved in literal");
}
}
- *dest++ = c;
}
- *dest = '\0';
+ token->val.str.len = buffer->cur - base - 1;
+ dest = _cpp_unaligned_alloc (pfile, token->val.str.len + 1);
+ memcpy (dest, base, token->val.str.len);
+ dest[token->val.str.len] = '\0';
+ token->val.str.text = dest;
- token->val.str.text = BUFF_FRONT (pfile->u_buff);
- token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
- BUFF_FRONT (pfile->u_buff) = dest + 1;
+ if (buffer->cur[-1] == '\n')
+ {
+ /* No string literal may extend over multiple lines. In
+ assembly language, suppress the error except for <>
+ includes. This is a kludge around not knowing where
+ comments are. */
+ if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
+ cpp_error (pfile, DL_ERROR, "missing terminating %c character",
+ (int) terminator);
+ buffer->cur--;
+ }
}
/* The stored comment includes the comment start and any terminator. */
@@ -916,23 +807,18 @@ _cpp_lex_direct (pfile)
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
result->type = CPP_NUMBER;
- parse_number (pfile, &result->val.str, 0);
+ lex_number (pfile, &result->val.str);
break;
case 'L':
/* 'L' may introduce wide characters or strings. */
- {
- const unsigned char *pos = buffer->cur;
-
- c = get_effective_char (pfile);
- if (c == '\'' || c == '"')
- {
- result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
- parse_string (pfile, result, c);
- break;
- }
- buffer->cur = pos;
- }
+ if (*buffer->cur == '\'' || *buffer->cur == '"')
+ {
+ result->type = (*buffer->cur == '"' ? CPP_WSTRING: CPP_WCHAR);
+ buffer->cur++;
+ lex_string (pfile, result);
+ break;
+ }
/* Fall through. */
start_ident:
@@ -948,7 +834,7 @@ _cpp_lex_direct (pfile)
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
result->type = CPP_NAME;
- result->val.node = parse_identifier (pfile);
+ result->val.node = lex_identifier (pfile);
/* Convert named operators to their proper types. */
if (result->val.node->flags & NODE_OPERATOR)
@@ -961,7 +847,7 @@ _cpp_lex_direct (pfile)
case '\'':
case '"':
result->type = c == '"' ? CPP_STRING: CPP_CHAR;
- parse_string (pfile, result, c);
+ lex_string (pfile, result);
break;
case '/':
@@ -1018,7 +904,7 @@ _cpp_lex_direct (pfile)
if (pfile->state.angled_headers)
{
result->type = CPP_HEADER_NAME;
- parse_string (pfile, result, '>');
+ lex_string (pfile, result);
break;
}
@@ -1108,8 +994,9 @@ _cpp_lex_direct (pfile)
/* All known character sets have 0...9 contiguous. */
else if (ISDIGIT (c))
{
+ buffer->cur--;
result->type = CPP_NUMBER;
- parse_number (pfile, &result->val.str, 1);
+ lex_number (pfile, &result->val.str);
}
else if (c == '*' && CPP_OPTION (pfile, cplusplus))
result->type = CPP_DOT_STAR;