diff options
author | Zack Weinberg <zack@gcc.gnu.org> | 2000-04-25 19:32:36 +0000 |
---|---|---|
committer | Zack Weinberg <zack@gcc.gnu.org> | 2000-04-25 19:32:36 +0000 |
commit | c5a047348d810987513ce54c6533c1314ad19593 (patch) | |
tree | 5a047961b22821bc1f9f0aa156612fd5c002d95c /gcc | |
parent | e0075d846d4a4bbcb3a5114bdc8f4f130bf819b1 (diff) | |
download | gcc-c5a047348d810987513ce54c6533c1314ad19593.zip gcc-c5a047348d810987513ce54c6533c1314ad19593.tar.gz gcc-c5a047348d810987513ce54c6533c1314ad19593.tar.bz2 |
[multiple changes]
2000-04-25 Zack Weinberg <zack@wolery.cumb.org>
* cpplib.h (struct cpp_buffer): Add 'mapped' flag; fix
commentary.
2000-04-25 Neil Booth <NeilB@earthling.net>
Restore previous patch, plus the following fixes:
* cpphash.c (_cpp_create_definition): Test PREV_WHITESPACE in
flags, not CPP_OPEN_PAREN.
* cpplex.c (expand_token_space, init_token_list,
cpp_free_token_list): Put the dummy token at list->tokens[-1].
(_cpp_lex_line, _cpp_lex_file): token list is 0-based.
From-SVN: r33419
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/cpphash.c | 13 | ||||
-rw-r--r-- | gcc/cpphash.h | 1 | ||||
-rw-r--r-- | gcc/cpplex.c | 1385 | ||||
-rw-r--r-- | gcc/cpplib.c | 23 | ||||
-rw-r--r-- | gcc/cpplib.h | 129 |
6 files changed, 1492 insertions, 74 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 03876e4..4547607 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2000-04-25 Zack Weinberg <zack@wolery.cumb.org> + + * cpplib.h (struct cpp_buffer): Add 'mapped' flag; fix + commentary. + +2000-04-25 Neil Booth <NeilB@earthling.net> + + Restore previous patch, plus the following fixes: + + * cpphash.c (_cpp_create_definition): Test PREV_WHITESPACE in + flags, not CPP_OPEN_PAREN. + * cpplex.c (expand_token_space, init_token_list, + cpp_free_token_list): Put the dummy token at list->tokens[-1]. + (_cpp_lex_line, _cpp_lex_file): token list is 0-based. + Tue Apr 25 14:06:40 2000 Alexandre Oliva <oliva@lsd.ic.unicamp.br> * config/i386/freebsd.h (INT_ASM_OP): Define. diff --git a/gcc/cpphash.c b/gcc/cpphash.c index 5817e964..fe594a2 100644 --- a/gcc/cpphash.c +++ b/gcc/cpphash.c @@ -459,7 +459,8 @@ collect_objlike_expansion (pfile, list) default:; } - if (i > 1 && !last_was_paste && (list->tokens[i].flags & HSPACE_BEFORE)) + if (i > 1 && !last_was_paste + && (list->tokens[i].flags & PREV_WHITESPACE)) CPP_PUTC (pfile, ' '); CPP_PUTS (pfile, @@ -571,10 +572,10 @@ collect_funlike_expansion (pfile, list, arglist, replacement) } if (last_token != PASTE && last_token != START - && (list->tokens[i].flags & HSPACE_BEFORE)) + && (list->tokens[i].flags & PREV_WHITESPACE)) CPP_PUTC (pfile, ' '); if (last_token == ARG && CPP_TRADITIONAL (pfile) - && !(list->tokens[i].flags & HSPACE_BEFORE)) + && !(list->tokens[i].flags & PREV_WHITESPACE)) endpat->raw_after = 1; switch (token) @@ -616,7 +617,7 @@ collect_funlike_expansion (pfile, list, arglist, replacement) { int raw_before = (last_token == PASTE || (CPP_TRADITIONAL (pfile) - && !(list->tokens[i].flags & HSPACE_BEFORE))); + && !(list->tokens[i].flags & PREV_WHITESPACE))); add_pat (&pat, &endpat, CPP_WRITTEN (pfile) - last /* nchars */, j /* argno */, @@ -865,7 +866,7 @@ _cpp_create_definition (pfile, list, hp) /* The macro is function-like only if the next character, with no intervening whitespace, is '('. */ else if (list->tokens[1].type == CPP_OPEN_PAREN - && ! (list->tokens[1].flags & HSPACE_BEFORE)) + && ! (list->tokens[1].flags & PREV_WHITESPACE)) { struct arglist args; int replacement; @@ -884,7 +885,7 @@ _cpp_create_definition (pfile, list, hp) whitespace after the name (6.10.3 para 3). */ else { - if (! (list->tokens[1].flags & CPP_OPEN_PAREN)) + if (! (list->tokens[1].flags & PREV_WHITESPACE)) cpp_pedwarn (pfile, "The C standard requires whitespace after #define %s", hp->name); diff --git a/gcc/cpphash.h b/gcc/cpphash.h index 78185f2..2d2ea8d 100644 --- a/gcc/cpphash.h +++ b/gcc/cpphash.h @@ -317,5 +317,6 @@ extern void _cpp_scan_line PARAMS ((cpp_reader *, cpp_toklist *)); /* In cpplib.c */ extern int _cpp_handle_directive PARAMS ((cpp_reader *)); extern void _cpp_handle_eof PARAMS ((cpp_reader *)); +extern void _cpp_check_directive PARAMS((cpp_toklist *, cpp_token *)); #endif diff --git a/gcc/cpplex.c b/gcc/cpplex.c index f46b638..3061437 100644 --- a/gcc/cpplex.c +++ b/gcc/cpplex.c @@ -4,6 +4,7 @@ Based on CCCP program by Paul Rubin, June 1986 Adapted to ANSI C, Richard Stallman, Jan 1987 Broken out to separate file, Zack Weinberg, Mar 2000 + Single-pass line tokenization by Neil Booth, April 2000 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -54,12 +55,15 @@ static void output_line_command PARAMS ((cpp_reader *, cpp_printer *, unsigned int)); static void bump_column PARAMS ((cpp_printer *, unsigned int, unsigned int)); -static void expand_name_space PARAMS ((cpp_toklist *)); +static void expand_name_space PARAMS ((cpp_toklist *, unsigned int)); static void expand_token_space PARAMS ((cpp_toklist *)); static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int)); static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *, unsigned int)); +#define auto_expand_name_space(list) \ + expand_name_space ((list), (list)->name_cap / 2) + /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */ void @@ -431,12 +435,12 @@ cpp_file_buffer (pfile) /* Expand a token list's string space. */ static void -expand_name_space (list) +expand_name_space (list, len) cpp_toklist *list; -{ - list->name_cap *= 2; - list->namebuf = (unsigned char *) xrealloc (list->namebuf, - list->name_cap); + unsigned int len; +{ + list->name_cap += len; + list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap); } /* Expand the number of tokens in a list. */ @@ -446,36 +450,42 @@ expand_token_space (list) { list->tokens_cap *= 2; list->tokens = (cpp_token *) - xrealloc (list->tokens, list->tokens_cap * sizeof (cpp_token)); + xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token)); + list->tokens++; /* Skip the dummy. */ } -/* Initialise a token list. */ +/* Initialize a token list. We allocate an extra token in front of + the token list, as this allows us to always peek at the previous + token without worrying about underflowing the list. */ static void init_token_list (pfile, list, recycle) cpp_reader *pfile; cpp_toklist *list; int recycle; { - /* Recycling a used list saves 2 free-malloc pairs. */ - if (recycle) + /* Recycling a used list saves 3 free-malloc pairs. */ + if (!recycle) { - list->tokens_used = 0; - list->name_used = 0; - } - else - { - /* Initialise token space. */ - list->tokens_cap = 256; /* 4K on Intel. */ - list->tokens_used = 0; + /* Initialize token space. Put a dummy token before the start + that will fail matches. */ + list->tokens_cap = 256; /* 4K's worth. */ list->tokens = (cpp_token *) - xmalloc (list->tokens_cap * sizeof (cpp_token)); + xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token)); + list->tokens[0].type = CPP_EOF; + list->tokens++; - /* Initialise name space. */ + /* Initialize name space. */ list->name_cap = 1024; - list->name_used = 0; list->namebuf = (unsigned char *) xmalloc (list->name_cap); + + /* Only create a comment space on demand. */ + list->comments_cap = 0; + list->comments = 0; } + list->tokens_used = 0; + list->name_used = 0; + list->comments_used = 0; if (pfile->buffer) list->line = pfile->buffer->lineno; list->dir_handler = 0; @@ -522,7 +532,7 @@ _cpp_scan_line (pfile, list) if (list->tokens_used >= list->tokens_cap) expand_token_space (list); if (list->name_used + len >= list->name_cap) - expand_name_space (list); + auto_expand_name_space (list); if (type == CPP_MACRO) type = CPP_NAME; @@ -530,7 +540,7 @@ _cpp_scan_line (pfile, list) list->tokens_used++; list->tokens[i].type = type; list->tokens[i].col = col; - list->tokens[i].flags = space_before ? HSPACE_BEFORE : 0; + list->tokens[i].flags = space_before ? PREV_WHITESPACE : 0; if (type == CPP_VSPACE) break; @@ -2037,3 +2047,1332 @@ _cpp_init_input_buffer (pfile) pfile->input_buffer = tmp; pfile->input_buffer_len = 8192; } + +#if 0 + +static void expand_comment_space PARAMS ((cpp_toklist *)); +void init_trigraph_map PARAMS ((void)); +static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *, + unsigned char *)); +static const unsigned char *backslash_start PARAMS ((cpp_reader *, + const unsigned char *)); +static int skip_block_comment PARAMS ((cpp_reader *)); +static int skip_line_comment PARAMS ((cpp_reader *)); +static void skip_whitespace PARAMS ((cpp_reader *, int)); +static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *)); +static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *)); +static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *, + unsigned int)); +static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *)); +static void copy_comment PARAMS ((cpp_toklist *, const unsigned char *, + unsigned int, unsigned int, unsigned int)); +void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *)); + +static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *)); + +unsigned int spell_char PARAMS ((unsigned char *, cpp_toklist *, + cpp_token *token)); +unsigned int spell_string PARAMS ((unsigned char *, cpp_toklist *, + cpp_token *token)); +unsigned int spell_comment PARAMS ((unsigned char *, cpp_toklist *, + cpp_token *token)); +unsigned int spell_name PARAMS ((unsigned char *, cpp_toklist *, + cpp_token *token)); +unsigned int spell_other PARAMS ((unsigned char *, cpp_toklist *, + cpp_token *token)); + +typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *, + cpp_token *)); + +/* Macros on a cpp_name. */ +#define INIT_NAME(list, name) \ + do {(name).len = 0; (name).offset = (list)->name_used;} while (0) + +#define IS_DIRECTIVE(list) (list->tokens[0].type == CPP_HASH) +#define COLUMN(cur) ((cur) - buffer->line_base) + +/* Maybe put these in the ISTABLE eventually. */ +#define IS_HSPACE(c) ((c) == ' ' || (c) == '\t') +#define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r') + +/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next + character, if any, is in buffer. */ +#define handle_newline(cur, limit, c) \ + do {\ + if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \ + (cur)++; \ + CPP_BUMP_LINE_CUR (pfile, (cur)); \ + } while (0) + +#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE)) +#define PREV_TOKEN_TYPE (cur_token[-1].type) + +#define SPELL_TEXT 0 +#define SPELL_HANDLER 1 +#define SPELL_NONE 2 +#define SPELL_EOL 3 + +#define T(e, s) {SPELL_TEXT, s}, +#define H(e, s) {SPELL_HANDLER, s}, +#define N(e, s) {SPELL_NONE, s}, +#define E(e, s) {SPELL_EOL, s}, + +static const struct token_spelling +{ + char type; + PTR speller; +} token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} }; + +#undef T +#undef H +#undef N +#undef E + +static const unsigned char *digraph_spellings [] = {"%:", "%:%:", "<:", + ":>", "<%", "%>"}; + +static void +expand_comment_space (list) + cpp_toklist *list; +{ + if (list->comments_cap == 0) + { + list->comments_cap = 10; + list->comments = (cpp_token *) + xmalloc (list->comments_cap * sizeof (cpp_token)); + } + else + { + list->comments_cap *= 2; + list->comments = (cpp_token *) + xrealloc (list->comments, list->comments_cap); + } +} + +void +cpp_free_token_list (list) + cpp_toklist *list; +{ + if (list->comments) + free (list->comments); + free (list->tokens - 1); + free (list->namebuf); + free (list); +} + +static char trigraph_map[256]; + +void +init_trigraph_map () +{ + trigraph_map['='] = '#'; + trigraph_map['('] = '['; + trigraph_map[')'] = ']'; + trigraph_map['/'] = '\\'; + trigraph_map['\''] = '^'; + trigraph_map['<'] = '{'; + trigraph_map['>'] = '}'; + trigraph_map['!'] = '|'; + trigraph_map['-'] = '~'; +} + +/* Call when a trigraph is encountered. It warns if necessary, and + returns true if the trigraph should be honoured. END is the third + character of a trigraph in the input stream. */ +static int +trigraph_ok (pfile, end) + cpp_reader *pfile; + const unsigned char *end; +{ + int accept = CPP_OPTION (pfile, trigraphs); + + if (CPP_OPTION (pfile, warn_trigraphs)) + { + unsigned int col = end - 1 - pfile->buffer->line_base; + if (accept) + cpp_warning_with_line (pfile, pfile->buffer->lineno, col, + "trigraph ??%c converted to %c", + (int) *end, (int) trigraph_map[*end]); + else + cpp_warning_with_line (pfile, pfile->buffer->lineno, col, + "trigraph ??%c ignored", (int) *end); + } + return accept; +} + +/* Scan a string for trigraphs, warning or replacing them inline as + appropriate. When parsing a string, we must call this routine + before processing a newline character (if trigraphs are enabled), + since the newline might be escaped by a preceding backslash + trigraph sequence. Returns a pointer to the end of the name after + replacement. */ + +static unsigned char* +trigraph_replace (pfile, src, limit) + cpp_reader *pfile; + unsigned char *src; + unsigned char* limit; +{ + unsigned char *dest; + + /* Starting with src[1], find two consecutive '?'. The case of no + trigraphs is streamlined. */ + + for (; src + 1 < limit; src += 2) + { + if (src[0] != '?') + continue; + + /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */ + if (src[-1] == '?') + src--; + else if (src + 2 == limit || src[1] != '?') + continue; + + /* Check if it really is a trigraph. */ + if (trigraph_map[src[2]] == 0) + continue; + + dest = src; + goto trigraph_found; + } + return limit; + + /* Now we have a trigraph, we need to scan the remaining buffer, and + copy-shifting its contents left if replacement is enabled. */ + for (; src + 2 < limit; dest++, src++) + if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]]) + { + trigraph_found: + src += 2; + if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src))) + *dest = trigraph_map[*src]; + } + + /* Copy remaining (at most 2) characters. */ + while (src < limit) + *dest++ = *src++; + return dest; +} + +/* If CUR is a backslash or the end of a trigraphed backslash, return + a pointer to its beginning, otherwise NULL. We don't read beyond + the buffer start, because there is the start of the comment in the + buffer. */ +static const unsigned char * +backslash_start (pfile, cur) + cpp_reader *pfile; + const unsigned char *cur; +{ + if (cur[0] == '\\') + return cur; + if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?' + && trigraph_ok (pfile, cur)) + return cur - 2; + return 0; +} + +/* Skip a C-style block comment. This is probably the trickiest + handler. We find the end of the comment by seeing if an asterisk + is before every '/' we encounter. The nasty complication is that a + previous asterisk may be separated by one or more escaped newlines. + Returns non-zero if comment terminated by EOF, zero otherwise. */ +static int +skip_block_comment (pfile) + cpp_reader *pfile; +{ + cpp_buffer *buffer = pfile->buffer; + const unsigned char *char_after_star = 0; + register const unsigned char *cur = buffer->cur; + int seen_eof = 0; + + /* Inner loop would think the comment has ended if the first comment + character is a '/'. Avoid this and keep the inner loop clean by + skipping such a character. */ + if (cur < buffer->rlimit && cur[0] == '/') + cur++; + + for (; cur < buffer->rlimit; ) + { + unsigned char c = *cur++; + + /* People like decorating comments with '*', so check for + '/' instead for efficiency. */ + if (c == '/') + { + if (cur[-2] == '*' || cur - 1 == char_after_star) + goto out; + + /* Warn about potential nested comments, but not when + the final character inside the comment is a '/'. + Don't bother to get it right across escaped newlines. */ + if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit + && cur[0] == '*' && cur[1] != '/') + { + buffer->cur = cur; + cpp_warning (pfile, "'/*' within comment"); + } + } + else if (IS_NEWLINE(c)) + { + const unsigned char* bslash = backslash_start (pfile, cur - 2); + + handle_newline (cur, buffer->rlimit, c); + /* Work correctly if there is an asterisk before an + arbirtrarily long sequence of escaped newlines. */ + if (bslash && (bslash[-1] == '*' || bslash == char_after_star)) + char_after_star = cur; + else + char_after_star = 0; + } + } + seen_eof = 1; + + out: + buffer->cur = cur; + return seen_eof; +} + +/* Skip a C++ or Chill line comment. Handles escaped newlines. + Returns non-zero if a multiline comment. */ +static int +skip_line_comment (pfile) + cpp_reader *pfile; +{ + cpp_buffer *buffer = pfile->buffer; + register const unsigned char *cur = buffer->cur; + int multiline = 0; + + for (; cur < buffer->rlimit; ) + { + unsigned char c = *cur++; + + if (IS_NEWLINE (c)) + { + /* Check for a (trigaph?) backslash escaping the newline. */ + if (!backslash_start (pfile, cur - 2)) + goto out; + multiline = 1; + handle_newline (cur, buffer->rlimit, c); + } + } + cur++; + + out: + buffer->cur = cur - 1; /* Leave newline for caller. */ + return multiline; +} + +/* Skips whitespace, stopping at next non-whitespace character. */ +static void +skip_whitespace (pfile, in_directive) + cpp_reader *pfile; + int in_directive; +{ + cpp_buffer *buffer = pfile->buffer; + register const unsigned char *cur = buffer->cur; + unsigned short null_count = 0; + + for (; cur < buffer->rlimit; ) + { + unsigned char c = *cur++; + + if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */ + continue; + if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */ + goto out; + if (c == '\0') + null_count++; + /* Mut be '\f' or '\v' */ + else if (in_directive && CPP_PEDANTIC (pfile)) + cpp_pedwarn (pfile, "%s in preprocessing directive", + c == '\f' ? "formfeed" : "vertical tab"); + } + cur++; + + out: + buffer->cur = cur - 1; + if (null_count) + cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored" + : "embedded null character ignored"); +} + +/* Parse (append) an identifier. */ +static void +parse_name (pfile, list, name) + cpp_reader *pfile; + cpp_toklist *list; + cpp_name *name; +{ + const unsigned char *name_limit; + unsigned char *namebuf; + cpp_buffer *buffer = pfile->buffer; + register const unsigned char *cur = buffer->cur; + + expanded: + name_limit = list->namebuf + list->name_cap; + namebuf = list->namebuf + list->name_used; + + for (; cur < buffer->rlimit && namebuf < name_limit; ) + { + unsigned char c = *namebuf = *cur; /* Copy a single char. */ + + if (! is_idchar(c)) + goto out; + namebuf++; + cur++; + if (c == '$' && CPP_PEDANTIC (pfile)) + { + buffer->cur = cur; + cpp_pedwarn (pfile, "'$' character in identifier"); + } + } + + /* Run out of name space? */ + if (cur < buffer->rlimit) + { + list->name_used = namebuf - list->namebuf; + auto_expand_name_space (list); + goto expanded; + } + + out: + buffer->cur = cur; + name->len = namebuf - (list->namebuf + name->offset); + list->name_used = namebuf - list->namebuf; +} + +/* Parse (append) a number. */ + +#define VALID_SIGN(c, prevc) \ + (((c) == '+' || (c) == '-') && \ + ((prevc) == 'e' || (prevc) == 'E' \ + || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89)))) + +static void +parse_number (pfile, list, name) + cpp_reader *pfile; + cpp_toklist *list; + cpp_name *name; +{ + const unsigned char *name_limit; + unsigned char *namebuf; + cpp_buffer *buffer = pfile->buffer; + register const unsigned char *cur = buffer->cur; + + expanded: + name_limit = list->namebuf + list->name_cap; + namebuf = list->namebuf + list->name_used; + + for (; cur < buffer->rlimit && namebuf < name_limit; ) + { + unsigned char c = *namebuf = *cur; /* Copy a single char. */ + + /* Perhaps we should accept '$' here if we accept it for + identifiers. We know namebuf[-1] is safe, because for c to + be a sign we must have pushed at least one character. */ + if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1])) + goto out; + + namebuf++; + cur++; + } + + /* Run out of name space? */ + if (cur < buffer->rlimit) + { + list->name_used = namebuf - list->namebuf; + auto_expand_name_space (list); + goto expanded; + } + + out: + buffer->cur = cur; + name->len = namebuf - (list->namebuf + name->offset); + list->name_used = namebuf - list->namebuf; +} + +/* Places a string terminated by an unescaped TERMINATOR into a + cpp_name, which should be expandable and thus at the top of the + list's stack. Handles embedded trigraphs, if necessary, and + escaped newlines. + + Can be used for character constants (terminator = '\''), string + constants ('"'), angled headers ('>') and assertions (')'). */ + +static void +parse_string (pfile, list, name, terminator) + cpp_reader *pfile; + cpp_toklist *list; + cpp_name *name; + unsigned int terminator; +{ + cpp_buffer *buffer = pfile->buffer; + register const unsigned char *cur = buffer->cur; + const unsigned char *name_limit; + unsigned char *namebuf; + unsigned int null_count = 0; + int trigraphed_len = 0; + + expanded: + name_limit = list->namebuf + list->name_cap; + namebuf = list->namebuf + list->name_used; + + for (; cur < buffer->rlimit && namebuf < name_limit; ) + { + unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */ + + if (c == '\0') + null_count++; + else if (c == terminator || IS_NEWLINE (c)) + { + unsigned char* name_start = list->namebuf + name->offset; + + /* Needed for trigraph_replace and multiline string warning. */ + buffer->cur = cur; + + /* Scan for trigraphs before checking if backslash-escaped. */ + if (CPP_OPTION (pfile, trigraphs) + || CPP_OPTION (pfile, warn_trigraphs)) + { + namebuf = trigraph_replace (pfile, name_start + trigraphed_len, + namebuf); + trigraphed_len = namebuf - 2 - (name_start + trigraphed_len); + if (trigraphed_len < 0) + trigraphed_len = 0; + } + + namebuf--; /* Drop the newline / terminator from the name. */ + if (IS_NEWLINE (c)) + { + /* Drop a backslash newline, and continue. */ + if (namebuf[-1] == '\\') + { + handle_newline (cur, buffer->rlimit, c); + namebuf--; + continue; + } + + cur--; + + /* In Fortran and assembly language, silently terminate + strings of either variety at end of line. This is a + kludge around not knowing where comments are in these + languages. */ + if (CPP_OPTION (pfile, lang_fortran) + || CPP_OPTION (pfile, lang_asm)) + goto out; + + /* Character constants, headers and asserts may not + extend over multiple lines. In Standard C, neither + may strings. We accept multiline strings as an + extension, but not in directives. */ + if (terminator != '"' || IS_DIRECTIVE (list)) + goto unterminated; + + cur++; /* Move forwards again. */ + + if (pfile->multiline_string_line == 0) + { + pfile->multiline_string_line = list->line; + if (CPP_PEDANTIC (pfile)) + cpp_pedwarn (pfile, "multi-line string constant"); + } + + *namebuf++ = '\n'; + handle_newline (cur, buffer->rlimit, c); + } + else + { + unsigned char *temp; + + /* An odd number of consecutive backslashes represents + an escaped terminator. */ + temp = namebuf - 1; + while (temp >= name_start && *temp == '\\') + temp--; + + if ((namebuf - temp) & 1) + goto out; + namebuf++; + } + } + } + + /* Run out of name space? */ + if (cur < buffer->rlimit) + { + list->name_used = namebuf - list->namebuf; + auto_expand_name_space (list); + goto expanded; + } + + /* We may not have trigraph-replaced the input for this code path, + but as the input is in error by being unterminated we don't + bother. Prevent warnings about no newlines at EOF. */ + if (IS_NEWLINE(cur[-1])) + cur--; + + unterminated: + cpp_error (pfile, "missing terminating %c character", (int) terminator); + + if (terminator == '\"' && pfile->multiline_string_line != list->line + && pfile->multiline_string_line != 0) + { + cpp_error_with_line (pfile, pfile->multiline_string_line, -1, + "possible start of unterminated string literal"); + pfile->multiline_string_line = 0; + } + + out: + buffer->cur = cur; + name->len = namebuf - (list->namebuf + name->offset); + list->name_used = namebuf - list->namebuf; + + if (null_count > 0) + cpp_warning (pfile, (null_count > 1 ? "null characters preserved" + : "null character preserved")); +} + +/* The character C helps us distinguish comment types: '*' = C style, + '-' = Chill-style and '/' = C++ style. For code simplicity, the + stored comment includes any C-style comment terminator. */ +static void +copy_comment (list, from, len, tok_no, type) + cpp_toklist *list; + const unsigned char *from; + unsigned int len; + unsigned int tok_no; + unsigned int type; +{ + cpp_token *comment; + + if (list->comments_used == list->comments_cap) + expand_comment_space (list); + + if (list->name_used + len > list->name_cap) + expand_name_space (list, len); + + comment = &list->comments[list->comments_used++]; + comment->type = type; + comment->aux = tok_no; + comment->val.name.len = len; + comment->val.name.offset = list->name_used; + + memcpy (list->namebuf + list->name_used, from, len); + list->name_used += len; +} + +/* + * The tokenizer's main loop. Returns a token list, representing a + * logical line in the input file, terminated with a CPP_VSPACE + * token. On EOF, a token list containing the single CPP_EOF token + * is returned. + * + * Implementation relies almost entirely on lookback, rather than + * looking forwards. This means that tokenization requires just + * a single pass of the file, even in the presence of trigraphs and + * escaped newlines, providing significant performance benefits. + * Trigraph overhead is negligible if they are disabled, and low + * even when enabled. + */ + +#define PUSH_TOKEN(ttype) cur_token++->type = ttype +#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype +#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype +#define BACKUP_DIGRAPH(ttype) do { \ + BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0) + +void +_cpp_lex_line (pfile, list) + cpp_reader *pfile; + cpp_toklist *list; +{ + cpp_token *cur_token, *token_limit; + cpp_buffer *buffer = pfile->buffer; + register const unsigned char *cur = buffer->cur; + unsigned char flags = 0; + + expanded: + token_limit = list->tokens + list->tokens_cap; + cur_token = list->tokens + list->tokens_used; + + for (; cur < buffer->rlimit && cur_token < token_limit;) + { + unsigned char c = *cur++; + + /* Optimize whitespace skipping, in particular the case of a + single whitespace character, as every other token is probably + whitespace. (' ' '\t' '\v' '\f' '\0'). */ + if (is_hspace ((unsigned int) c)) + { + if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur))) + { + buffer->cur = cur - (c == '\0'); /* Get the null warning. */ + skip_whitespace (pfile, IS_DIRECTIVE (list)); + cur = buffer->cur; + } + flags = PREV_WHITESPACE; + if (cur == buffer->rlimit) + break; + c = *cur++; + } + + /* Initialize current token. Its type is set in the switch. */ + cur_token->col = COLUMN (cur); + cur_token->flags = flags; + flags = 0; + + switch (c) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + /* Prepend an immediately previous CPP_DOT token. */ + if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ()) + { + cur_token--; + if (list->name_cap == list->name_used) + auto_expand_name_space (list); + + cur_token->val.name.len = 1; + cur_token->val.name.offset = list->name_used; + list->namebuf[list->name_used++] = '.'; + } + else + INIT_NAME (list, cur_token->val.name); + cur--; /* Backup character. */ + + continue_number: + buffer->cur = cur; + parse_number (pfile, list, &cur_token->val.name); + cur = buffer->cur; + + PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */ + break; + + letter: + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + INIT_NAME (list, cur_token->val.name); + cur--; /* Backup character. */ + cur_token->type = CPP_NAME; /* Identifier, macro etc. */ + + continue_name: + buffer->cur = cur; + parse_name (pfile, list, &cur_token->val.name); + cur = buffer->cur; + + /* Find handler for newly created / extended directive. */ + if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1]) + _cpp_check_directive (list, cur_token); + cur_token++; + break; + + case '\'': + /* Fall through. */ + case '\"': + cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING; + /* Do we have a wide string? */ + if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN () + && cur_token[-1].val.name.len == 1 + && TOK_NAME (list, cur_token - 1)[0] == 'L' + && !CPP_TRADITIONAL (pfile)) + { + /* No need for 'L' any more. */ + list->name_used--; + (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING); + } + + do_parse_string: + /* Here c is one of ' " > or ). */ + INIT_NAME (list, cur_token->val.name); + buffer->cur = cur; + parse_string (pfile, list, &cur_token->val.name, c); + cur = buffer->cur; + cur_token++; + break; + + case '/': + cur_token->type = CPP_DIV; + if (IMMED_TOKEN ()) + { + if (PREV_TOKEN_TYPE == CPP_DIV) + { + /* We silently allow C++ comments in system headers, + irrespective of conformance mode, because lots of + broken systems do that and trying to clean it up + in fixincludes is a nightmare. */ + if (buffer->system_header_p) + goto do_line_comment; + else if (CPP_OPTION (pfile, cplusplus_comments)) + { + if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile) + && ! buffer->warned_cplusplus_comments) + { + buffer->cur = cur; + cpp_pedwarn (pfile, + "C++ style comments are not allowed in ISO C89"); + cpp_pedwarn (pfile, + "(this will be reported only once per input file)"); + buffer->warned_cplusplus_comments = 1; + } + do_line_comment: + buffer->cur = cur; + if (cur[-2] != c) + cpp_warning (pfile, + "comment start split across lines"); + if (skip_line_comment (pfile)) + cpp_error_with_line (pfile, list->line, + cur_token[-1].col, + "multi-line comment"); + if (!CPP_OPTION (pfile, discard_comments)) + copy_comment (list, cur, buffer->cur - cur, + cur_token - 1 - list->tokens, c == '/' + ? CPP_CPP_COMMENT: CPP_CHILL_COMMENT); + cur = buffer->cur; + + /* Back-up to first '-' or '/'. */ + cur_token -= 2; + if (!CPP_OPTION (pfile, traditional)) + flags = PREV_WHITESPACE; + } + } + } + cur_token++; + break; + + case '*': + cur_token->type = CPP_MULT; + if (IMMED_TOKEN ()) + { + if (PREV_TOKEN_TYPE == CPP_DIV) + { + buffer->cur = cur; + if (cur[-2] != '/') + cpp_warning (pfile, + "comment start '/*' split across lines"); + if (skip_block_comment (pfile)) + cpp_error_with_line (pfile, list->line, cur_token[-1].col, + "unterminated comment"); + else if (buffer->cur[-2] != '*') + cpp_warning (pfile, + "comment end '*/' split across lines"); + if (!CPP_OPTION (pfile, discard_comments)) + copy_comment (list, cur, buffer->cur - cur, + cur_token - 1 - list->tokens, CPP_C_COMMENT); + cur = buffer->cur; + + cur_token -= 2; + if (!CPP_OPTION (pfile, traditional)) + flags = PREV_WHITESPACE; + } + else if (CPP_OPTION (pfile, cplusplus)) + { + /* In C++, there are .* and ->* operators. */ + if (PREV_TOKEN_TYPE == CPP_DEREF) + BACKUP_TOKEN (CPP_DEREF_STAR); + else if (PREV_TOKEN_TYPE == CPP_DOT) + BACKUP_TOKEN (CPP_DOT_STAR); + } + } + cur_token++; + break; + + case '\n': + case '\r': + handle_newline (cur, buffer->rlimit, c); + if (PREV_TOKEN_TYPE != CPP_BACKSLASH || !IMMED_TOKEN ()) + { + if (PREV_TOKEN_TYPE == CPP_BACKSLASH) + { + buffer->cur = cur; + cpp_warning (pfile, + "backslash and newline separated by space"); + } + PUSH_TOKEN (CPP_VSPACE); + goto out; + } + /* Remove the escaped newline. Then continue to process + any interrupted name or number. */ + cur_token--; + if (IMMED_TOKEN ()) + { + cur_token--; + if (cur_token->type == CPP_NAME) + goto continue_name; + else if (cur_token->type == CPP_NUMBER) + goto continue_number; + cur_token++; + } + break; + + case '-': + if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS) + { + if (CPP_OPTION (pfile, chill)) + goto do_line_comment; + REVISE_TOKEN (CPP_MINUS_MINUS); + } + else + PUSH_TOKEN (CPP_MINUS); + break; + + /* The digraph flag checking ensures that ## and %:%: + are interpreted as CPP_PASTE, but #%: and %:# are not. */ + make_hash: + case '#': + if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN () + && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0) + REVISE_TOKEN (CPP_PASTE); + else + PUSH_TOKEN (CPP_HASH); + break; + + case ':': + cur_token->type = CPP_COLON; + if (IMMED_TOKEN ()) + { + if (PREV_TOKEN_TYPE == CPP_COLON + && CPP_OPTION (pfile, cplusplus)) + BACKUP_TOKEN (CPP_SCOPE); + /* Digraph: "<:" is a '[' */ + else if (PREV_TOKEN_TYPE == CPP_LESS) + BACKUP_DIGRAPH (CPP_OPEN_SQUARE); + /* Digraph: "%:" is a '#' */ + else if (PREV_TOKEN_TYPE == CPP_MOD) + { + (--cur_token)->flags |= DIGRAPH; + goto make_hash; + } + } + cur_token++; + break; + + case '&': + if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND) + REVISE_TOKEN (CPP_AND_AND); + else + PUSH_TOKEN (CPP_AND); + break; + + make_or: + case '|': + if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR) + REVISE_TOKEN (CPP_OR_OR); + else + PUSH_TOKEN (CPP_OR); + break; + + case '+': + if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS) + REVISE_TOKEN (CPP_PLUS_PLUS); + else + PUSH_TOKEN (CPP_PLUS); + break; + + case '=': + /* This relies on equidistance of "?=" and "?" tokens. */ + if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ) + REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ)); + else + PUSH_TOKEN (CPP_EQ); + break; + + case '>': + cur_token->type = CPP_GREATER; + if (IMMED_TOKEN ()) + { + if (PREV_TOKEN_TYPE == CPP_GREATER) + BACKUP_TOKEN (CPP_RSHIFT); + else if (PREV_TOKEN_TYPE == CPP_MINUS) + BACKUP_TOKEN (CPP_DEREF); + /* Digraph: ":>" is a ']' */ + else if (PREV_TOKEN_TYPE == CPP_COLON) + BACKUP_DIGRAPH (CPP_CLOSE_SQUARE); + /* Digraph: "%>" is a '}' */ + else if (PREV_TOKEN_TYPE == CPP_MOD) + BACKUP_DIGRAPH (CPP_CLOSE_BRACE); + } + cur_token++; + break; + + case '<': + if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS) + { + REVISE_TOKEN (CPP_LSHIFT); + break; + } + /* Is this the beginning of a header name? */ + if (list->dir_flags & SYNTAX_INCLUDE) + { + c = '>'; /* Terminator. */ + cur_token->type = CPP_HEADER_NAME; + goto do_parse_string; + } + PUSH_TOKEN (CPP_LESS); + break; + + case '%': + /* Digraph: "<%" is a '{' */ + cur_token->type = CPP_MOD; + if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS) + BACKUP_DIGRAPH (CPP_OPEN_BRACE); + cur_token++; + break; + + case ')': + PUSH_TOKEN (CPP_CLOSE_PAREN); + break; + + case '(': + /* Is this the beginning of an assertion string? */ + if (list->dir_flags & SYNTAX_ASSERT) + { + c = ')'; /* Terminator. */ + cur_token->type = CPP_ASSERTION; + goto do_parse_string; + } + PUSH_TOKEN (CPP_OPEN_PAREN); + break; + + make_complement: + case '~': + PUSH_TOKEN (CPP_COMPL); + break; + + case '?': + if (cur + 1 < buffer->rlimit && *cur == '?' + && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1)) + { + /* Handle trigraph. */ + cur++; + switch (*cur++) + { + case '(': goto make_open_square; + case ')': goto make_close_square; + case '<': goto make_open_brace; + case '>': goto make_close_brace; + case '=': goto make_hash; + case '!': goto make_or; + case '-': goto make_complement; + case '/': goto make_backslash; + case '\'': goto make_xor; + } + } + if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus)) + { + /* GNU C++ defines <? and >? operators. */ + if (PREV_TOKEN_TYPE == CPP_LESS) + { + REVISE_TOKEN (CPP_MIN); + break; + } + else if (PREV_TOKEN_TYPE == CPP_GREATER) + { + REVISE_TOKEN (CPP_MAX); + break; + } + } + PUSH_TOKEN (CPP_QUERY); + break; + + case '.': + if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT + && IMMED_TOKEN () + && !(cur_token[-1].flags & PREV_WHITESPACE)) + { + cur_token -= 2; + PUSH_TOKEN (CPP_ELLIPSIS); + } + else + PUSH_TOKEN (CPP_DOT); + break; + + make_xor: + case '^': PUSH_TOKEN (CPP_XOR); break; + make_open_brace: + case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break; + make_close_brace: + case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break; + make_open_square: + case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break; + make_close_square: + case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break; + make_backslash: + case '\\': PUSH_TOKEN (CPP_BACKSLASH); break; + case '!': PUSH_TOKEN (CPP_NOT); break; + case ',': PUSH_TOKEN (CPP_COMMA); break; + case ';': PUSH_TOKEN (CPP_SEMICOLON); break; + + case '$': + if (CPP_OPTION (pfile, dollars_in_ident)) + goto letter; + /* Fall through */ + default: + cur_token->aux = c; + PUSH_TOKEN (CPP_OTHER); + break; + } + } + + /* Run out of token space? */ + if (cur_token == token_limit) + { + list->tokens_used = cur_token - list->tokens; + expand_token_space (list); + goto expanded; + } + + cur_token->type = CPP_EOF; + cur_token->flags = flags; + + if (cur_token != &list->tokens[0]) + { + /* Next call back will get just a CPP_EOF. */ + buffer->cur = cur; + cpp_warning (pfile, "no newline at end of file"); + PUSH_TOKEN (CPP_VSPACE); + } + + out: + buffer->cur = cur; + + list->tokens_used = cur_token - list->tokens; + + /* FIXME: take this check out and put it in the caller. + list->directive == 0 indicates an unknown directive (but null + directive is OK). This is the first time we can be sure the + directive is invalid, and thus warn about it, because it might + have been split by escaped newlines. Also, don't complain about + invalid directives in assembly source, we don't know where the + comments are, and # may introduce assembler pseudo-ops. */ + + if (IS_DIRECTIVE (list) && list->dir_handler == 0 + && list->tokens[1].type != CPP_VSPACE + && !CPP_OPTION (pfile, lang_asm)) + cpp_error_with_line (pfile, list->line, list->tokens[1].col, + "invalid preprocessing directive"); +} + +/* Token spelling functions. Used for output of a preprocessed file, + stringizing and token pasting. They all assume sufficient buffer + is allocated, and return exactly how much they used. */ + +/* Needs buffer of 3 + len. */ +unsigned int +spell_char (buffer, list, token) + unsigned char *buffer; + cpp_toklist *list; + cpp_token *token; +{ + unsigned char* orig_buff = buffer; + size_t len; + + if (token->type == CPP_WCHAR) + *buffer++ = 'L'; + *buffer++ = '\''; + + len = token->val.name.len; + memcpy (buffer, TOK_NAME (list, token), len); + buffer += len; + *buffer++ = '\''; + return buffer - orig_buff; +} + +/* Needs buffer of 3 + len. */ +unsigned int +spell_string (buffer, list, token) + unsigned char *buffer; + cpp_toklist *list; + cpp_token *token; +{ + unsigned char* orig_buff = buffer; + size_t len; + + if (token->type == CPP_WSTRING) + *buffer++ = 'L'; + *buffer++ = '"'; + + len = token->val.name.len; + memcpy (buffer, TOK_NAME (list, token), len); + buffer += len; + *buffer++ = '"'; + return buffer - orig_buff; +} + +/* Needs buffer of len + 2. */ +unsigned int +spell_comment (buffer, list, token) + unsigned char *buffer; + cpp_toklist *list; + cpp_token *token; +{ + size_t len; + + if (token->type == CPP_C_COMMENT) + { + *buffer++ = '/'; + *buffer++ = '*'; + } + else if (token->type == CPP_CPP_COMMENT) + { + *buffer++ = '/'; + *buffer++ = '/'; + } + else + { + *buffer++ = '-'; + *buffer++ = '-'; + } + + len = token->val.name.len; + memcpy (buffer, TOK_NAME (list, token), len); + + return len + 2; +} + +/* Needs buffer of len. */ +unsigned int +spell_name (buffer, list, token) + unsigned char *buffer; + cpp_toklist *list; + cpp_token *token; +{ + size_t len; + + len = token->val.name.len; + memcpy (buffer, TOK_NAME (list, token), len); + buffer += len; + + return len; +} + +/* Needs buffer of 1. */ +unsigned int +spell_other (buffer, list, token) + unsigned char *buffer; + cpp_toklist *list ATTRIBUTE_UNUSED; + cpp_token *token; +{ + *buffer++ = token->aux; + return 1; +} + +void +_cpp_lex_file (pfile) + cpp_reader* pfile; +{ + int recycle; + cpp_toklist* list; + + init_trigraph_map (); + list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist)); + + for (recycle = 0; ;) + { + init_token_list (pfile, list, recycle); + recycle = 1; + + _cpp_lex_line (pfile, list); + if (list->tokens[0].type == CPP_EOF) + break; + + if (list->dir_handler) + { + if (list->dir_handler (pfile)) + { + list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist)); + recycle = 0; + } + } + else + _cpp_output_list (pfile, list); + } +} + +static void +_cpp_output_list (pfile, list) + cpp_reader *pfile; + cpp_toklist *list; +{ + unsigned int comment_no = 0; + cpp_token *token, *comment_token = 0; + + if (list->comments_used > 0) + comment_token = list->tokens + list->comments[0].aux; + + CPP_RESERVE (pfile, 2); /* Always have room for " \n". */ + for (token = &list->tokens[0];; token++) + { + if (token->flags & PREV_WHITESPACE) + { + /* Output comments if -C. Otherwise a space will do. */ + if (token == comment_token) + { + cpp_token *comment = &list->comments[comment_no]; + do + { + /* Longest wrapper is 4. */ + CPP_RESERVE (pfile, 4 + 2 + comment->val.name.len); + pfile->limit += spell_comment (pfile->limit, list, comment); + comment_no++, comment++; + if (comment_no == list->comments_used) + break; + comment_token = comment->aux + list->tokens; + } + while (comment_token == token); + } + else + CPP_PUTC_Q (pfile, ' '); + } + + switch (token_spellings[token->type].type) + { + case SPELL_TEXT: + { + const unsigned char *spelling; + unsigned char c; + + CPP_RESERVE (pfile, 4 + 2); /* Longest is 4. */ + if (token->flags & DIGRAPH) + spelling = digraph_spellings [token->type - CPP_FIRST_DIGRAPH]; + else + spelling = token_spellings[token->type].speller; + + while ((c = *spelling++) != '\0') + CPP_PUTC_Q (pfile, c); + } + break; + + case SPELL_HANDLER: + { + speller s; + + s = (speller) token_spellings[token->type].speller; + /* Longest wrapper is 4. */ + CPP_RESERVE (pfile, 4 + 2 + token->val.name.len); + pfile->limit += s (pfile->limit, list, token); + } + break; + + case SPELL_EOL: + CPP_PUTC_Q (pfile, '\n'); + return; + + case SPELL_NONE: + cpp_error (pfile, "Unwriteable token"); + break; + } + } +} + +#endif diff --git a/gcc/cpplib.c b/gcc/cpplib.c index 5bb5162..2d466ff 100644 --- a/gcc/cpplib.c +++ b/gcc/cpplib.c @@ -150,6 +150,29 @@ DIRECTIVE_TABLE #undef D #undef DIRECTIVE_TABLE +/* Check if a token's name matches that of a known directive. Put in + this file to save exporting dtable and other unneeded information. */ +void +_cpp_check_directive (list, token) + cpp_toklist *list; + cpp_token *token; +{ + const char *name = list->namebuf + token->val.name.offset; + size_t len = token->val.name.len; + unsigned int i; + + list->dir_handler = 0; + list->dir_flags = 0; + + for (i = 0; i < N_DIRECTIVES; i++) + if (dtable[i].length == len && !strncmp (dtable[i].name, name, len)) + { + list->dir_handler = dtable[i].func; + list->dir_flags = dtable[i].flags; + break; + } +} + /* Handle a possible # directive. '#' has already been read. */ diff --git a/gcc/cpplib.h b/gcc/cpplib.h index 764d8e6..aca02a9 100644 --- a/gcc/cpplib.h +++ b/gcc/cpplib.h @@ -34,13 +34,26 @@ typedef struct cpp_options cpp_options; typedef struct cpp_printer cpp_printer; typedef struct cpp_token cpp_token; typedef struct cpp_toklist cpp_toklist; +typedef struct cpp_name cpp_name; + +/* The first two groups, apart from '=', can appear in preprocessor + expressions. This allows a lookup table to be implemented in + _cpp_parse_expr. + + The first group, to CPP_LAST_EQ, can be immediately followed by an + '='. The lexer needs operators ending in '=', like ">>=", to be in + the same order as their counterparts without the '=', like ">>". */ + +/* Positions in the table. */ +#define CPP_LAST_EQ CPP_LSHIFT +#define CPP_FIRST_DIGRAPH CPP_HASH - /* Put operators that can appear in a preprocessor expression first. - This allows a lookup table to be implemented in _cpp_parse_expr. - Ordering within this group is currently not significant, apart - from those ending in '=' being at the end. */ #define TTYPE_TABLE \ - T(CPP_PLUS = 0, "+") /* math */ \ + T(CPP_EQ = 0, "=") \ + T(CPP_NOT, "!") \ + T(CPP_GREATER, ">") /* compare */ \ + T(CPP_LESS, "<") \ + T(CPP_PLUS, "+") /* math */ \ T(CPP_MINUS, "-") \ T(CPP_MULT, "*") \ T(CPP_DIV, "/") \ @@ -51,22 +64,19 @@ typedef struct cpp_toklist cpp_toklist; T(CPP_COMPL, "~") \ T(CPP_RSHIFT, ">>") \ T(CPP_LSHIFT, "<<") \ - T(CPP_NOT, "!") /* logicals */ \ - T(CPP_AND_AND, "&&") \ +\ + T(CPP_AND_AND, "&&") /* logical */ \ T(CPP_OR_OR, "||") \ T(CPP_QUERY, "?") \ T(CPP_COLON, ":") \ T(CPP_COMMA, ",") /* grouping */ \ T(CPP_OPEN_PAREN, "(") \ T(CPP_CLOSE_PAREN, ")") \ - T(CPP_GREATER, ">") /* compare */ \ - T(CPP_LESS, "<") \ - T(CPP_EQ_EQ, "==") \ + T(CPP_EQ_EQ, "==") /* compare */ \ T(CPP_NOT_EQ, "!=") \ T(CPP_GREATER_EQ, ">=") \ T(CPP_LESS_EQ, "<=") \ \ - /* The remainder of the punctuation. Order is not significant. */ \ T(CPP_PLUS_EQ, "+=") /* math */ \ T(CPP_MINUS_EQ, "-=") \ T(CPP_MULT_EQ, "*=") \ @@ -78,55 +88,67 @@ typedef struct cpp_toklist cpp_toklist; T(CPP_COMPL_EQ, "~=") \ T(CPP_RSHIFT_EQ, ">>=") \ T(CPP_LSHIFT_EQ, "<<=") \ - T(CPP_EQ, "=") /* assign */ \ + /* Digraphs together, beginning with CPP_FIRST_DIGRAPH. */ \ + T(CPP_HASH, "#") /* digraphs */ \ + T(CPP_PASTE, "##") \ + T(CPP_OPEN_SQUARE, "[") \ + T(CPP_CLOSE_SQUARE, "]") \ + T(CPP_OPEN_BRACE, "{") \ + T(CPP_CLOSE_BRACE, "}") \ + /* The remainder of the punctuation. Order is not significant. */ \ + T(CPP_SEMICOLON, ";") /* structure */ \ + T(CPP_ELLIPSIS, "...") \ + T(CPP_BACKSLASH, "\\") \ T(CPP_PLUS_PLUS, "++") /* increment */ \ T(CPP_MINUS_MINUS, "--") \ T(CPP_DEREF, "->") /* accessors */ \ T(CPP_DOT, ".") \ - T(CPP_OPEN_SQUARE, "[") \ - T(CPP_CLOSE_SQUARE, "]") \ T(CPP_SCOPE, "::") \ T(CPP_DEREF_STAR, "->*") \ T(CPP_DOT_STAR, ".*") \ - T(CPP_OPEN_BRACE, "{") /* structure */ \ - T(CPP_CLOSE_BRACE, "}") \ - T(CPP_SEMICOLON, ";") \ - T(CPP_ELLIPSIS, "...") \ - T(CPP_HASH, "#") \ - T(CPP_PASTE, "##") \ - T(CPP_BACKSLASH, "\\") \ T(CPP_MIN, "<?") /* extension */ \ T(CPP_MAX, ">?") \ - T(CPP_OTHER, spell_other) /* stray punctuation */ \ + H(CPP_OTHER, spell_other) /* stray punctuation */ \ \ - T(CPP_NAME, spell_name) /* word */ \ - T(CPP_INT, 0) /* 23 */ \ - T(CPP_FLOAT, 0) /* 3.14159 */ \ - T(CPP_NUMBER, spell_name) /* 34_be+ta */ \ - T(CPP_CHAR, spell_char) /* 'char' */ \ - T(CPP_WCHAR, spell_char) /* L'char' */ \ - T(CPP_STRING, spell_string) /* "string" */ \ - T(CPP_WSTRING, spell_string) /* L"string" */ \ + H(CPP_NAME, spell_name) /* word */ \ + N(CPP_INT, 0) /* 23 */ \ + N(CPP_FLOAT, 0) /* 3.14159 */ \ + H(CPP_NUMBER, spell_name) /* 34_be+ta */ \ + H(CPP_CHAR, spell_char) /* 'char' */ \ + H(CPP_WCHAR, spell_char) /* L'char' */ \ + H(CPP_STRING, spell_string) /* "string" */ \ + H(CPP_WSTRING, spell_string) /* L"string" */ \ \ - T(CPP_COMMENT, spell_comment) /* Only if output comments. */ \ - T(CPP_VSPACE, "\n") /* End of line. */ \ - T(CPP_EOF, 0) /* End of file. */ \ - T(CPP_HEADER_NAME, 0) /* <stdio.h> in #include */ \ - T(CPP_ASSERTION, 0) /* (...) in #assert */ \ + H(CPP_C_COMMENT, spell_comment) /* Only if output comments. */ \ + H(CPP_CPP_COMMENT, spell_comment) /* Only if output comments. */ \ + H(CPP_CHILL_COMMENT, spell_comment) /* Only if output comments. */ \ + N(CPP_MACRO_ARG, 0) /* Macro argument. */ \ + N(CPP_SUBLIST, 0) /* Sublist. */ \ + E(CPP_VSPACE, "\n") /* End of line. */ \ + N(CPP_EOF, 0) /* End of file. */ \ + N(CPP_HEADER_NAME, 0) /* <stdio.h> in #include */ \ + N(CPP_ASSERTION, 0) /* (...) in #assert */ \ \ /* Obsolete - will be removed when no code uses them still. */ \ - T(CPP_HSPACE, 0) /* Horizontal white space. */ \ - T(CPP_POP, 0) /* End of buffer. */ \ - T(CPP_DIRECTIVE, 0) /* #define and the like */ \ - T(CPP_MACRO, 0) /* Like a NAME, but expanded. */ + H(CPP_COMMENT, 0) /* Only if output comments. */ \ + N(CPP_HSPACE, 0) /* Horizontal white space. */ \ + N(CPP_POP, 0) /* End of buffer. */ \ + N(CPP_DIRECTIVE, 0) /* #define and the like */ \ + N(CPP_MACRO, 0) /* Like a NAME, but expanded. */ #define T(e, s) e, +#define H(e, s) e, +#define N(e, s) e, +#define E(e, s) e, enum cpp_ttype { TTYPE_TABLE N_TTYPES }; #undef T +#undef H +#undef N +#undef E /* Payload of a NAME, NUMBER, FLOAT, STRING, or COMMENT token. */ struct cpp_name @@ -135,8 +157,12 @@ struct cpp_name unsigned int offset; /* from list->namebuf */ }; -/* Per token flags. */ -#define HSPACE_BEFORE (1 << 0) /* token preceded by hspace */ +#define TOK_NAME(list, token) ((list)->namebuf + (token)->val.name.offset) + +/* Flags for the cpp_token structure. */ +#define PREV_WHITESPACE 1 /* If whitespace before this token. */ +#define DIGRAPH 2 /* If it was a digraph. */ +#define UNSIGNED_INT 4 /* If int preprocessing token unsigned. */ /* A preprocessing token. This has been carefully packed and should occupy 16 bytes on @@ -150,8 +176,9 @@ struct cpp_token unsigned char type; #endif unsigned char flags; /* flags - see above */ - unsigned int aux; /* hash of a NAME, or something - - see uses in the code */ + unsigned int aux; /* CPP_OTHER character. Hash of a + NAME, or something - see uses + in the code */ union { struct cpp_name name; /* a string */ @@ -168,7 +195,7 @@ typedef int (*parse_cleanup_t) PARAMS ((cpp_buffer *, cpp_reader *)); struct cpp_toklist { - struct cpp_token *tokens; /* actual tokens as an array */ + cpp_token *tokens; /* actual tokens as an array */ unsigned int tokens_used; /* tokens used */ unsigned int tokens_cap; /* tokens allocated */ @@ -178,6 +205,11 @@ struct cpp_toklist unsigned int line; /* starting line number */ + /* Comment copying. */ + cpp_token *comments; /* comment tokens. */ + unsigned int comments_used; /* comment tokens used. */ + unsigned int comments_cap; /* comment token capacity. */ + /* Only used if tokens[0].type == CPP_DIRECTIVE. This is the handler to call after lexing the rest of this line. The flags indicate whether the rest of the line gets special treatment @@ -244,8 +276,12 @@ struct cpp_buffer /* True if we have already warned about C++ comments in this file. The warning happens only for C89 extended mode with -pedantic on, - and only once per file (otherwise it would be far too noisy). */ + or for -Wtraditional, and only once per file (otherwise it would + be far too noisy). */ char warned_cplusplus_comments; + + /* True if this buffer's data is mmapped. */ + char mapped; }; struct file_name_map_list; @@ -561,6 +597,7 @@ struct cpp_printer /* Name under which this program was invoked. */ extern const char *progname; +extern void _cpp_lex_file PARAMS((cpp_reader *)); extern int cpp_handle_options PARAMS ((cpp_reader *, int, char **)); extern enum cpp_ttype cpp_get_token PARAMS ((cpp_reader *)); extern enum cpp_ttype cpp_get_non_space_token PARAMS ((cpp_reader *)); @@ -580,6 +617,8 @@ extern void cpp_assert PARAMS ((cpp_reader *, const char *)); extern void cpp_undef PARAMS ((cpp_reader *, const char *)); extern void cpp_unassert PARAMS ((cpp_reader *, const char *)); +extern void cpp_free_token_list PARAMS ((cpp_toklist *)); + /* N.B. The error-message-printer prototypes have not been nicely formatted because exgettext needs to see 'msgid' on the same line as the name of the function in order to work properly. Only the |