diff options
Diffstat (limited to 'gcc/cpplex.c')
-rw-r--r-- | gcc/cpplex.c | 1620 |
1 files changed, 1620 insertions, 0 deletions
diff --git a/gcc/cpplex.c b/gcc/cpplex.c new file mode 100644 index 0000000..674db90 --- /dev/null +++ b/gcc/cpplex.c @@ -0,0 +1,1620 @@ +/* CPP Library - lexical analysis. + Copyright (C) 2000 Free Software Foundation, Inc. + Contributed by Per Bothner, 1994-95. + Based on CCCP program by Paul Rubin, June 1986 + Adapted to ANSI C, Richard Stallman, Jan 1987 + Broken out to separate file, Zack Weinberg, Mar 2000 + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#include "config.h" +#include "system.h" +#include "intl.h" +#include "cpplib.h" +#include "cpphash.h" + +#define PEEKN(N) (CPP_BUFFER (pfile)->rlimit - CPP_BUFFER (pfile)->cur >= (N) \ + ? CPP_BUFFER (pfile)->cur[N] : EOF) +#define FORWARD(N) CPP_FORWARD (CPP_BUFFER (pfile), (N)) +#define GETC() CPP_BUF_GET (CPP_BUFFER (pfile)) +#define PEEKC() CPP_BUF_PEEK (CPP_BUFFER (pfile)) + +static void skip_block_comment PARAMS ((cpp_reader *)); +static void skip_line_comment PARAMS ((cpp_reader *)); +static int maybe_macroexpand PARAMS ((cpp_reader *, long)); +static int skip_comment PARAMS ((cpp_reader *, int)); +static int copy_comment PARAMS ((cpp_reader *, int)); +static void skip_string PARAMS ((cpp_reader *, int)); +static void parse_string PARAMS ((cpp_reader *, int)); +static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *)); +static int null_cleanup PARAMS ((cpp_buffer *, cpp_reader *)); + +/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */ + +void +_cpp_grow_token_buffer (pfile, n) + cpp_reader *pfile; + long n; +{ + long old_written = CPP_WRITTEN (pfile); + pfile->token_buffer_size = n + 2 * pfile->token_buffer_size; + pfile->token_buffer = (U_CHAR *) + xrealloc(pfile->token_buffer, pfile->token_buffer_size); + CPP_SET_WRITTEN (pfile, old_written); +} + +static int +null_cleanup (pbuf, pfile) + cpp_buffer *pbuf ATTRIBUTE_UNUSED; + cpp_reader *pfile ATTRIBUTE_UNUSED; +{ + return 0; +} + +/* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack. + If BUFFER != NULL, then use the LENGTH characters in BUFFER + as the new input buffer. + Return the new buffer, or NULL on failure. */ + +cpp_buffer * +cpp_push_buffer (pfile, buffer, length) + cpp_reader *pfile; + const U_CHAR *buffer; + long length; +{ + cpp_buffer *buf = CPP_BUFFER (pfile); + cpp_buffer *new; + if (++pfile->buffer_stack_depth == CPP_STACK_MAX) + { + cpp_fatal (pfile, "macro or `#include' recursion too deep"); + return NULL; + } + + new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer)); + + new->if_stack = pfile->if_stack; + new->cleanup = null_cleanup; + new->buf = new->cur = buffer; + new->alimit = new->rlimit = buffer + length; + new->prev = buf; + new->mark = -1; + new->line_base = NULL; + + CPP_BUFFER (pfile) = new; + return new; +} + +cpp_buffer * +cpp_pop_buffer (pfile) + cpp_reader *pfile; +{ + cpp_buffer *buf = CPP_BUFFER (pfile); + if (ACTIVE_MARK_P (pfile)) + cpp_ice (pfile, "mark active in cpp_pop_buffer"); + (*buf->cleanup) (buf, pfile); + CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf); + free (buf); + pfile->buffer_stack_depth--; + return CPP_BUFFER (pfile); +} + +/* Scan until CPP_BUFFER (PFILE) is exhausted into PFILE->token_buffer. + Pop the buffer when done. */ + +void +cpp_scan_buffer (pfile) + cpp_reader *pfile; +{ + cpp_buffer *buffer = CPP_BUFFER (pfile); + enum cpp_token token; + if (CPP_OPTIONS (pfile)->no_output) + { + long old_written = CPP_WRITTEN (pfile); + /* In no-output mode, we can ignore everything but directives. */ + for (;;) + { + if (! pfile->only_seen_white) + _cpp_skip_rest_of_line (pfile); + token = cpp_get_token (pfile); + if (token == CPP_EOF) /* Should not happen ... */ + break; + if (token == CPP_POP && CPP_BUFFER (pfile) == buffer) + { + if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) != NULL) + cpp_pop_buffer (pfile); + break; + } + } + CPP_SET_WRITTEN (pfile, old_written); + } + else + { + for (;;) + { + token = cpp_get_token (pfile); + if (token == CPP_EOF) /* Should not happen ... */ + break; + if (token == CPP_POP && CPP_BUFFER (pfile) == buffer) + { + if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) != NULL) + cpp_pop_buffer (pfile); + break; + } + } + } +} + +/* + * Rescan a string (which may have escape marks) into pfile's buffer. + * Place the result in pfile->token_buffer. + * + * The input is copied before it is scanned, so it is safe to pass + * it something from the token_buffer that will get overwritten + * (because it follows CPP_WRITTEN). This is used by do_include. + */ + +void +cpp_expand_to_buffer (pfile, buf, length) + cpp_reader *pfile; + const U_CHAR *buf; + int length; +{ + register cpp_buffer *ip; + U_CHAR *buf1; + int save_no_output; + + if (length < 0) + { + cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer"); + return; + } + + /* Set up the input on the input stack. */ + + buf1 = (U_CHAR *) alloca (length + 1); + memcpy (buf1, buf, length); + buf1[length] = 0; + + ip = cpp_push_buffer (pfile, buf1, length); + if (ip == NULL) + return; + ip->has_escapes = 1; + + /* Scan the input, create the output. */ + save_no_output = CPP_OPTIONS (pfile)->no_output; + CPP_OPTIONS (pfile)->no_output = 0; + CPP_OPTIONS (pfile)->no_line_commands++; + cpp_scan_buffer (pfile); + CPP_OPTIONS (pfile)->no_line_commands--; + CPP_OPTIONS (pfile)->no_output = save_no_output; + + CPP_NUL_TERMINATE (pfile); +} + +void +cpp_buf_line_and_col (pbuf, linep, colp) + register cpp_buffer *pbuf; + long *linep, *colp; +{ + if (pbuf) + { + *linep = pbuf->lineno; + if (colp) + *colp = pbuf->cur - pbuf->line_base; + } + else + { + *linep = 0; + if (colp) + *colp = 0; + } +} + +/* Return the topmost cpp_buffer that corresponds to a file (not a macro). */ + +cpp_buffer * +cpp_file_buffer (pfile) + cpp_reader *pfile; +{ + cpp_buffer *ip; + + for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip)) + if (ip->ihash != NULL) + return ip; + return NULL; +} + +/* Skip a C-style block comment. We know it's a comment, and point is + at the second character of the starter. */ +static void +skip_block_comment (pfile) + cpp_reader *pfile; +{ + int c, prev_c = -1; + long line, col; + + FORWARD(1); + cpp_buf_line_and_col (CPP_BUFFER (pfile), &line, &col); + for (;;) + { + c = GETC (); + if (c == EOF) + { + cpp_error_with_line (pfile, line, col, "unterminated comment"); + return; + } + else if (c == '\n' || c == '\r') + { + /* \r cannot be a macro escape marker here. */ + if (!ACTIVE_MARK_P (pfile)) + CPP_BUMP_LINE (pfile); + } + else if (c == '/' && prev_c == '*') + return; + else if (c == '*' && prev_c == '/' + && CPP_OPTIONS (pfile)->warn_comments) + cpp_warning (pfile, "`/*' within comment"); + + prev_c = c; + } +} + +/* Skip a C++/Chill line comment. We know it's a comment, and point + is at the second character of the initiator. */ +static void +skip_line_comment (pfile) + cpp_reader *pfile; +{ + FORWARD(1); + for (;;) + { + int c = GETC (); + + /* We don't have to worry about EOF in here. */ + if (c == '\n') + { + /* Don't consider final '\n' to be part of comment. */ + FORWARD(-1); + return; + } + else if (c == '\r') + { + /* \r cannot be a macro escape marker here. */ + if (!ACTIVE_MARK_P (pfile)) + CPP_BUMP_LINE (pfile); + if (CPP_OPTIONS (pfile)->warn_comments) + cpp_warning (pfile, "backslash-newline within line comment"); + } + } +} + +/* Skip a comment - C, C++, or Chill style. M is the first character + of the comment marker. If this really is a comment, skip to its + end and return ' '. If this is not a comment, return M (which will + be '/' or '-'). */ + +static int +skip_comment (pfile, m) + cpp_reader *pfile; + int m; +{ + if (m == '/' && PEEKC() == '*') + { + skip_block_comment (pfile); + return ' '; + } + else if (m == '/' && PEEKC() == '/') + { + if (CPP_BUFFER (pfile)->system_header_p) + { + /* We silently allow C++ comments in system headers, irrespective + of conformance mode, because lots of busted systems do that + and trying to clean it up in fixincludes is a nightmare. */ + skip_line_comment (pfile); + return ' '; + } + else if (CPP_OPTIONS (pfile)->cplusplus_comments) + { + if (CPP_OPTIONS (pfile)->c89 + && CPP_PEDANTIC (pfile) + && ! CPP_BUFFER (pfile)->warned_cplusplus_comments) + { + cpp_pedwarn (pfile, + "C++ style comments are not allowed in ISO C89"); + cpp_pedwarn (pfile, + "(this will be reported only once per input file)"); + CPP_BUFFER (pfile)->warned_cplusplus_comments = 1; + } + skip_line_comment (pfile); + return ' '; + } + else + return m; + } + else if (m == '-' && PEEKC() == '-' + && CPP_OPTIONS (pfile)->chill) + { + skip_line_comment (pfile); + return ' '; + } + else + return m; +} + +/* Identical to skip_comment except that it copies the comment into the + token_buffer. This is used if !discard_comments. */ +static int +copy_comment (pfile, m) + cpp_reader *pfile; + int m; +{ + const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */ + const U_CHAR *limit; + + if (skip_comment (pfile, m) == m) + return m; + + limit = CPP_BUFFER (pfile)->cur; + CPP_RESERVE (pfile, limit - start + 2); + CPP_PUTC_Q (pfile, m); + for (; start <= limit; start++) + if (*start != '\r') + CPP_PUTC_Q (pfile, *start); + + return ' '; +} + +/* Skip whitespace \-newline and comments. Does not macro-expand. */ + +void +_cpp_skip_hspace (pfile) + cpp_reader *pfile; +{ + int c; + while (1) + { + c = GETC(); + if (c == EOF) + return; + else if (is_hspace(c)) + { + if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile)) + cpp_pedwarn (pfile, "%s in preprocessing directive", + c == '\f' ? "formfeed" : "vertical tab"); + } + else if (c == '\r') + { + /* \r is a backslash-newline marker if !has_escapes, and + a deletable-whitespace or no-reexpansion marker otherwise. */ + if (CPP_BUFFER (pfile)->has_escapes) + { + if (PEEKC() == ' ') + FORWARD(1); + else + break; + } + else + CPP_BUMP_LINE (pfile); + } + else if (c == '/' || c == '-') + { + c = skip_comment (pfile, c); + if (c != ' ') + break; + } + else + break; + } + FORWARD(-1); +} + +/* Read and discard the rest of the current line. */ + +void +_cpp_skip_rest_of_line (pfile) + cpp_reader *pfile; +{ + for (;;) + { + int c = GETC(); + switch (c) + { + case '\n': + FORWARD(-1); + case EOF: + return; + + case '\r': + if (! CPP_BUFFER (pfile)->has_escapes) + CPP_BUMP_LINE (pfile); + break; + + case '\'': + case '\"': + skip_string (pfile, c); + break; + + case '/': + case '-': + skip_comment (pfile, c); + break; + + case '\f': + case '\v': + if (CPP_PEDANTIC (pfile)) + cpp_pedwarn (pfile, "%s in preprocessing directive", + c == '\f' ? "formfeed" : "vertical tab"); + break; + + } + } +} + +/* Parse an identifier starting with C. */ + +void +_cpp_parse_name (pfile, c) + cpp_reader *pfile; + int c; +{ + for (;;) + { + if (! is_idchar(c)) + { + FORWARD (-1); + break; + } + + if (c == '$' && CPP_PEDANTIC (pfile)) + cpp_pedwarn (pfile, "`$' in identifier"); + + CPP_RESERVE(pfile, 2); /* One more for final NUL. */ + CPP_PUTC_Q (pfile, c); + c = GETC(); + if (c == EOF) + break; + } + CPP_NUL_TERMINATE_Q (pfile); + return; +} + +/* Parse and skip over a string starting with C. A single quoted + string is treated like a double -- some programs (e.g., troff) are + perverse this way. (However, a single quoted string is not allowed + to extend over multiple lines.) */ +static void +skip_string (pfile, c) + cpp_reader *pfile; + int c; +{ + long start_line, start_column; + cpp_buf_line_and_col (cpp_file_buffer (pfile), &start_line, &start_column); + + while (1) + { + int cc = GETC(); + switch (cc) + { + case EOF: + cpp_error_with_line (pfile, start_line, start_column, + "unterminated string or character constant"); + if (pfile->multiline_string_line != start_line + && pfile->multiline_string_line != 0) + cpp_error_with_line (pfile, + pfile->multiline_string_line, -1, + "possible real start of unterminated constant"); + pfile->multiline_string_line = 0; + return; + + case '\n': + CPP_BUMP_LINE (pfile); + /* In Fortran and assembly language, silently terminate + strings of either variety at end of line. This is a + kludge around not knowing where comments are in these + languages. */ + if (CPP_OPTIONS (pfile)->lang_fortran + || CPP_OPTIONS (pfile)->lang_asm) + { + FORWARD(-1); + return; + } + /* Character constants may not extend over multiple lines. + In Standard C, neither may strings. We accept multiline + strings as an extension. */ + if (c == '\'') + { + cpp_error_with_line (pfile, start_line, start_column, + "unterminated character constant"); + FORWARD(-1); + return; + } + if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0) + cpp_pedwarn_with_line (pfile, start_line, start_column, + "string constant runs past end of line"); + if (pfile->multiline_string_line == 0) + pfile->multiline_string_line = start_line; + break; + + case '\r': + if (CPP_BUFFER (pfile)->has_escapes) + { + cpp_ice (pfile, "\\r escape inside string constant"); + FORWARD(1); + } + else + /* Backslash newline is replaced by nothing at all. */ + CPP_BUMP_LINE (pfile); + break; + + case '\\': + FORWARD(1); + break; + + case '\"': + case '\'': + if (cc == c) + return; + break; + } + } +} + +/* Parse a string and copy it to the output. */ + +static void +parse_string (pfile, c) + cpp_reader *pfile; + int c; +{ + const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */ + const U_CHAR *limit; + + skip_string (pfile, c); + + limit = CPP_BUFFER (pfile)->cur; + CPP_RESERVE (pfile, limit - start + 2); + CPP_PUTC_Q (pfile, c); + for (; start < limit; start++) + if (*start != '\r') + CPP_PUTC_Q (pfile, *start); +} + +/* Read an assertion into the token buffer, converting to + canonical form: `#predicate(a n swe r)' The next non-whitespace + character to read should be the first letter of the predicate. + Returns 0 for syntax error, 1 for bare predicate, 2 for predicate + with answer (see callers for why). In case of 0, an error has been + printed. */ +int +_cpp_parse_assertion (pfile) + cpp_reader *pfile; +{ + int c, dropwhite; + _cpp_skip_hspace (pfile); + c = PEEKC(); + if (! is_idstart(c)) + { + cpp_error (pfile, "assertion predicate is not an identifier"); + return 0; + } + CPP_PUTC(pfile, '#'); + FORWARD(1); + _cpp_parse_name (pfile, c); + + c = PEEKC(); + if (c != '(') + { + if (is_hspace(c) || c == '\r') + _cpp_skip_hspace (pfile); + c = PEEKC(); + } + if (c != '(') + return 1; + + CPP_PUTC(pfile, '('); + FORWARD(1); + dropwhite = 1; + while ((c = GETC()) != ')') + { + if (is_space(c)) + { + if (! dropwhite) + { + CPP_PUTC(pfile, ' '); + dropwhite = 1; + } + } + else if (c == '\n' || c == EOF) + { + if (c == '\n') FORWARD(-1); + cpp_error (pfile, "un-terminated assertion answer"); + return 0; + } + else if (c == '\r') + /* \r cannot be a macro escape here. */ + CPP_BUMP_LINE (pfile); + else + { + CPP_PUTC (pfile, c); + dropwhite = 0; + } + } + + if (pfile->limit[-1] == ' ') + pfile->limit[-1] = ')'; + else if (pfile->limit[-1] == '(') + { + cpp_error (pfile, "empty token sequence in assertion"); + return 0; + } + else + CPP_PUTC (pfile, ')'); + + CPP_NUL_TERMINATE (pfile); + return 2; +} + +/* Get the next token, and add it to the text in pfile->token_buffer. + Return the kind of token we got. */ + +enum cpp_token +_cpp_lex_token (pfile) + cpp_reader *pfile; +{ + register int c, c2, c3; + enum cpp_token token; + struct cpp_options *opts = CPP_OPTIONS (pfile); + + get_next: + c = GETC(); + switch (c) + { + case EOF: + return CPP_EOF; + + case '/': + if (PEEKC () == '=') + goto op2; + + comment: + if (opts->discard_comments) + c = skip_comment (pfile, c); + else + c = copy_comment (pfile, c); + if (c != ' ') + goto randomchar; + + /* Comments are equivalent to spaces. + For -traditional, a comment is equivalent to nothing. */ + if (opts->traditional || !opts->discard_comments) + return CPP_COMMENT; + else + { + CPP_PUTC (pfile, c); + return CPP_HSPACE; + } + + case '#': + if (pfile->parsing_if_directive) + { + _cpp_skip_hspace (pfile); + _cpp_parse_assertion (pfile); + return CPP_ASSERTION; + } + + if (pfile->parsing_define_directive && ! CPP_TRADITIONAL (pfile)) + { + CPP_RESERVE (pfile, 3); + CPP_PUTC_Q (pfile, '#'); + CPP_NUL_TERMINATE_Q (pfile); + if (PEEKC () != '#') + return CPP_STRINGIZE; + + FORWARD (1); + CPP_PUTC_Q (pfile, '#'); + CPP_NUL_TERMINATE_Q (pfile); + return CPP_TOKPASTE; + } + + if (!pfile->only_seen_white) + goto randomchar; + /* -traditional directives are recognized only with the # in + column 1. + XXX Layering violation. */ + if (CPP_TRADITIONAL (pfile) + && CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base != 1) + goto randomchar; + return CPP_DIRECTIVE; + + case '\"': + case '\'': + parse_string (pfile, c); + pfile->only_seen_white = 0; + return c == '\'' ? CPP_CHAR : CPP_STRING; + + case '$': + if (!opts->dollars_in_ident) + goto randomchar; + goto letter; + + case ':': + if (opts->cplusplus && PEEKC () == ':') + goto op2; + goto randomchar; + + case '&': + case '+': + case '|': + c2 = PEEKC (); + if (c2 == c || c2 == '=') + goto op2; + goto randomchar; + + case '*': + case '!': + case '%': + case '=': + case '^': + if (PEEKC () == '=') + goto op2; + goto randomchar; + + case '-': + c2 = PEEKC (); + if (c2 == '-') + { + if (opts->chill) + goto comment; /* Chill style comment */ + else + goto op2; + } + else if (c2 == '=') + goto op2; + else if (c2 == '>') + { + if (opts->cplusplus && PEEKN (1) == '*') + { + /* In C++, there's a ->* operator. */ + token = CPP_OTHER; + pfile->only_seen_white = 0; + CPP_RESERVE (pfile, 4); + CPP_PUTC_Q (pfile, c); + CPP_PUTC_Q (pfile, GETC ()); + CPP_PUTC_Q (pfile, GETC ()); + CPP_NUL_TERMINATE_Q (pfile); + return token; + } + goto op2; + } + goto randomchar; + + case '<': + if (pfile->parsing_include_directive) + { + for (;;) + { + CPP_PUTC (pfile, c); + if (c == '>') + break; + c = GETC (); + if (c == '\n' || c == EOF) + { + cpp_error (pfile, + "missing '>' in `#include <FILENAME>'"); + break; + } + else if (c == '\r') + { + if (!CPP_BUFFER (pfile)->has_escapes) + { + /* Backslash newline is replaced by nothing. */ + CPP_ADJUST_WRITTEN (pfile, -1); + CPP_BUMP_LINE (pfile); + } + else + { + /* We might conceivably get \r- or \r<space> in + here. Just delete 'em. */ + int d = GETC(); + if (d != '-' && d != ' ') + cpp_ice (pfile, "unrecognized escape \\r%c", d); + CPP_ADJUST_WRITTEN (pfile, -1); + } + } + } + return CPP_STRING; + } + /* else fall through */ + case '>': + c2 = PEEKC (); + if (c2 == '=') + goto op2; + /* GNU C++ supports MIN and MAX operators <? and >?. */ + if (c2 != c && (!opts->cplusplus || c2 != '?')) + goto randomchar; + FORWARD(1); + CPP_RESERVE (pfile, 4); + CPP_PUTC (pfile, c); + CPP_PUTC (pfile, c2); + c3 = PEEKC (); + if (c3 == '=') + CPP_PUTC_Q (pfile, GETC ()); + CPP_NUL_TERMINATE_Q (pfile); + pfile->only_seen_white = 0; + return CPP_OTHER; + + case '.': + c2 = PEEKC (); + if (ISDIGIT(c2)) + { + CPP_RESERVE(pfile, 2); + CPP_PUTC_Q (pfile, '.'); + c = GETC (); + goto number; + } + + /* In C++ there's a .* operator. */ + if (opts->cplusplus && c2 == '*') + goto op2; + + if (c2 == '.' && PEEKN(1) == '.') + { + CPP_RESERVE(pfile, 4); + CPP_PUTC_Q (pfile, '.'); + CPP_PUTC_Q (pfile, '.'); + CPP_PUTC_Q (pfile, '.'); + FORWARD (2); + CPP_NUL_TERMINATE_Q (pfile); + pfile->only_seen_white = 0; + return CPP_3DOTS; + } + goto randomchar; + + op2: + token = CPP_OTHER; + pfile->only_seen_white = 0; + CPP_RESERVE(pfile, 3); + CPP_PUTC_Q (pfile, c); + CPP_PUTC_Q (pfile, GETC ()); + CPP_NUL_TERMINATE_Q (pfile); + return token; + + case 'L': + c2 = PEEKC (); + if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile)) + { + CPP_PUTC (pfile, c); + c = GETC (); + parse_string (pfile, c); + pfile->only_seen_white = 0; + return c == '\'' ? CPP_WCHAR : CPP_WSTRING; + } + goto letter; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + number: + c2 = '.'; + for (;;) + { + CPP_RESERVE (pfile, 2); + CPP_PUTC_Q (pfile, c); + c = PEEKC (); + if (c == EOF) + break; + if (!is_numchar(c) && c != '.' + && ((c2 != 'e' && c2 != 'E' + && ((c2 != 'p' && c2 != 'P') + || CPP_OPTIONS (pfile)->c89)) + || (c != '+' && c != '-'))) + break; + FORWARD(1); + c2= c; + } + CPP_NUL_TERMINATE_Q (pfile); + pfile->only_seen_white = 0; + return CPP_NUMBER; + case 'b': case 'c': case 'd': case 'h': case 'o': + case 'B': case 'C': case 'D': case 'H': case 'O': + if (opts->chill && PEEKC () == '\'') + { + pfile->only_seen_white = 0; + CPP_RESERVE (pfile, 2); + CPP_PUTC_Q (pfile, c); + CPP_PUTC_Q (pfile, '\''); + FORWARD(1); + for (;;) + { + c = GETC(); + if (c == EOF) + goto chill_number_eof; + if (!is_numchar(c)) + break; + CPP_PUTC (pfile, c); + } + if (c == '\'') + { + CPP_RESERVE (pfile, 2); + CPP_PUTC_Q (pfile, c); + CPP_NUL_TERMINATE_Q (pfile); + return CPP_STRING; + } + else + { + FORWARD(-1); + chill_number_eof: + CPP_NUL_TERMINATE (pfile); + return CPP_NUMBER; + } + } + else + goto letter; + case '_': + case 'a': case 'e': case 'f': case 'g': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'p': case 'q': + case 'r': case 's': case 't': case 'u': case 'v': case 'w': + case 'x': case 'y': case 'z': + case 'A': case 'E': case 'F': case 'G': case 'I': case 'J': + case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + letter: + pfile->only_seen_white = 0; + _cpp_parse_name (pfile, c); + return CPP_MACRO; + + case ' ': case '\t': case '\v': + for (;;) + { + CPP_PUTC (pfile, c); + c = PEEKC (); + if (c == EOF || !is_hspace(c)) + break; + FORWARD(1); + } + return CPP_HSPACE; + + case '\r': + if (CPP_BUFFER (pfile)->has_escapes) + { + c = GETC (); + if (c == '-') + { + if (pfile->output_escapes) + CPP_PUTS (pfile, "\r-", 2); + _cpp_parse_name (pfile, GETC ()); + return CPP_NAME; + } + else if (c == ' ') + { + CPP_RESERVE (pfile, 2); + if (pfile->output_escapes) + CPP_PUTC_Q (pfile, '\r'); + CPP_PUTC_Q (pfile, c); + return CPP_HSPACE; + } + else + { + cpp_ice (pfile, "unrecognized escape \\r%c", c); + goto get_next; + } + } + else + { + /* Backslash newline is ignored. */ + CPP_BUMP_LINE (pfile); + goto get_next; + } + + case '\n': + CPP_PUTC (pfile, c); + if (pfile->only_seen_white == 0) + pfile->only_seen_white = 1; + CPP_BUMP_LINE (pfile); + if (! CPP_OPTIONS (pfile)->no_line_commands) + { + pfile->lineno++; + if (CPP_BUFFER (pfile)->lineno != pfile->lineno) + _cpp_output_line_command (pfile, same_file); + } + return CPP_VSPACE; + + case '(': token = CPP_LPAREN; goto char1; + case ')': token = CPP_RPAREN; goto char1; + case '{': token = CPP_LBRACE; goto char1; + case '}': token = CPP_RBRACE; goto char1; + case ',': token = CPP_COMMA; goto char1; + case ';': token = CPP_SEMICOLON; goto char1; + + randomchar: + default: + token = CPP_OTHER; + char1: + pfile->only_seen_white = 0; + CPP_PUTC (pfile, c); + return token; + } +} + +/* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile). + Caller is expected to have checked no_macro_expand. */ +static int +maybe_macroexpand (pfile, written) + cpp_reader *pfile; + long written; +{ + U_CHAR *macro = pfile->token_buffer + written; + size_t len = CPP_WRITTEN (pfile) - written; + HASHNODE *hp = _cpp_lookup (pfile, macro, len); + + if (!hp) + return 0; + if (hp->type == T_DISABLED) + { + if (pfile->output_escapes) + { + /* Insert a no-reexpand marker before IDENT. */ + CPP_RESERVE (pfile, 2); + CPP_ADJUST_WRITTEN (pfile, 2); + macro = pfile->token_buffer + written; + + memmove (macro + 2, macro, len); + macro[0] = '\r'; + macro[1] = '-'; + } + return 0; + } + + /* If macro wants an arglist, verify that a '(' follows. */ + if (hp->type == T_MACRO && hp->value.defn->nargs >= 0) + { + int macbuf_whitespace = 0; + int c; + + while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile))) + { + const U_CHAR *point = CPP_BUFFER (pfile)->cur; + for (;;) + { + _cpp_skip_hspace (pfile); + c = PEEKC (); + if (c == '\n') + FORWARD(1); + else + break; + } + if (point != CPP_BUFFER (pfile)->cur) + macbuf_whitespace = 1; + if (c == '(') + goto is_macro_call; + else if (c != EOF) + goto not_macro_call; + cpp_pop_buffer (pfile); + } + + CPP_SET_MARK (pfile); + for (;;) + { + _cpp_skip_hspace (pfile); + c = PEEKC (); + if (c == '\n') + FORWARD(1); + else + break; + } + CPP_GOTO_MARK (pfile); + + if (c != '(') + { + not_macro_call: + if (macbuf_whitespace) + CPP_PUTC (pfile, ' '); + return 0; + } + } + + is_macro_call: + /* This is now known to be a macro call. + Expand the macro, reading arguments as needed, + and push the expansion on the input stack. */ + _cpp_macroexpand (pfile, hp); + CPP_SET_WRITTEN (pfile, written); + return 1; +} + +enum cpp_token +cpp_get_token (pfile) + cpp_reader *pfile; +{ + enum cpp_token token; + long written = CPP_WRITTEN (pfile); + + get_next: + token = _cpp_lex_token (pfile); + + switch (token) + { + default: + return token; + + case CPP_DIRECTIVE: + if (_cpp_handle_directive (pfile)) + return CPP_DIRECTIVE; + pfile->only_seen_white = 0; + CPP_PUTC (pfile, '#'); + return CPP_OTHER; + + case CPP_MACRO: + if (! pfile->no_macro_expand + && maybe_macroexpand (pfile, written)) + goto get_next; + return CPP_NAME; + + case CPP_EOF: + if (CPP_BUFFER (pfile)->manual_pop) + /* If we've been reading from redirected input, the + frontend will pop the buffer. */ + return CPP_EOF; + else if (CPP_BUFFER (pfile)->seen_eof) + { + if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) == NULL) + return CPP_EOF; + + cpp_pop_buffer (pfile); + goto get_next; + } + else + { + _cpp_handle_eof (pfile); + return CPP_POP; + } + } +} + +/* Like cpp_get_token, but skip spaces and comments. */ + +enum cpp_token +cpp_get_non_space_token (pfile) + cpp_reader *pfile; +{ + int old_written = CPP_WRITTEN (pfile); + for (;;) + { + enum cpp_token token = cpp_get_token (pfile); + if (token != CPP_COMMENT && token != CPP_POP + && token != CPP_HSPACE && token != CPP_VSPACE) + return token; + CPP_SET_WRITTEN (pfile, old_written); + } +} + +/* Like cpp_get_token, except that it does not read past end-of-line. + Also, horizontal space is skipped, and macros are popped. */ + +enum cpp_token +_cpp_get_directive_token (pfile) + cpp_reader *pfile; +{ + long old_written = CPP_WRITTEN (pfile); + enum cpp_token token; + + for (;;) + { + _cpp_skip_hspace (pfile); + if (PEEKC () == '\n') + return CPP_VSPACE; + + token = cpp_get_token (pfile); + /* token could be hspace at the beginning of a macro. */ + if (token == CPP_HSPACE || token == CPP_COMMENT) + { + CPP_SET_WRITTEN (pfile, old_written); + continue; + } + + /* token cannot be vspace, it would have been caught above. */ + if (token == CPP_VSPACE) + { + cpp_ice (pfile, "VSPACE in get_directive_token"); + return token; + } + + /* token cannot be POP unless the buffer is a macro buffer. */ + if (token != CPP_POP) + return token; + + if (! CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile))) + { + cpp_ice (pfile, "POP of file buffer in get_directive_token"); + return token; + } + + /* We must pop the buffer by hand, or else cpp_get_token might + hand us white space or newline on the next invocation. */ + cpp_pop_buffer (pfile); + } +} + +/* Determine the current line and column. Used only by read_and_prescan. */ +static U_CHAR * +find_position (start, limit, linep) + U_CHAR *start; + U_CHAR *limit; + unsigned long *linep; +{ + unsigned long line = *linep; + U_CHAR *lbase = start; + while (start < limit) + { + U_CHAR ch = *start++; + if (ch == '\n' || ch == '\r') + { + line++; + lbase = start; + } + } + *linep = line; + return lbase; +} + +/* Read the entire contents of file DESC into buffer BUF. LEN is how + much memory to allocate initially; more will be allocated if + necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to + canonical form (\n). If enabled, convert and/or warn about + trigraphs. Convert backslash-newline to a one-character escape + (\r) and remove it from "embarrassing" places (i.e. the middle of a + token). If there is no newline at the end of the file, add one and + warn. Returns -1 on failure, or the actual length of the data to + be scanned. + + This function does a lot of work, and can be a serious performance + bottleneck. It has been tuned heavily; make sure you understand it + before hacking. The common case - no trigraphs, Unix style line + breaks, backslash-newline set off by whitespace, newline at EOF - + has been optimized at the expense of the others. The performance + penalty for DOS style line breaks (\r\n) is about 15%. + + Warnings lose particularly heavily since we have to determine the + line number, which involves scanning from the beginning of the file + or from the last warning. The penalty for the absence of a newline + at the end of reload1.c is about 60%. (reload1.c is 329k.) + + If your file has more than one kind of end-of-line marker, you + will get messed-up line numbering. */ + +/* Table of characters that can't be handled in the inner loop. + Keep these contiguous to optimize the performance of the code generated + for the switch that uses them. */ +#define SPECCASE_EMPTY 0 +#define SPECCASE_NUL 1 +#define SPECCASE_CR 2 +#define SPECCASE_BACKSLASH 3 +#define SPECCASE_QUESTION 4 + +long +_cpp_read_and_prescan (pfile, fp, desc, len) + cpp_reader *pfile; + cpp_buffer *fp; + int desc; + size_t len; +{ + U_CHAR *buf = (U_CHAR *) xmalloc (len); + U_CHAR *ip, *op, *line_base; + U_CHAR *ibase; + U_CHAR *speccase = pfile->input_speccase; + unsigned long line; + unsigned int deferred_newlines; + int count; + size_t offset; + + offset = 0; + op = buf; + line_base = buf; + line = 1; + ibase = pfile->input_buffer + 2; + deferred_newlines = 0; + + for (;;) + { + read_next: + + count = read (desc, pfile->input_buffer + 2, pfile->input_buffer_len); + if (count < 0) + goto error; + else if (count == 0) + break; + + offset += count; + ip = ibase; + ibase = pfile->input_buffer + 2; + ibase[count] = ibase[count+1] = '\0'; + + if (offset > len) + { + size_t delta_op; + size_t delta_line_base; + len *= 2; + if (offset > len) + /* len overflowed. + This could happen if the file is larger than half the + maximum address space of the machine. */ + goto too_big; + + delta_op = op - buf; + delta_line_base = line_base - buf; + buf = (U_CHAR *) xrealloc (buf, len); + op = buf + delta_op; + line_base = buf + delta_line_base; + } + + for (;;) + { + unsigned int span = 0; + + /* Deal with \-newline in the middle of a token. */ + if (deferred_newlines) + { + while (speccase[ip[span]] == SPECCASE_EMPTY + && ip[span] != '\n' + && ip[span] != '\t' + && ip[span] != ' ') + span++; + memcpy (op, ip, span); + op += span; + ip += span; + /* If ip[0] is SPECCASE_EMPTY, we have hit white space. + Dump out the remaining deferred \-newlines. */ + if (speccase[ip[0]] == SPECCASE_EMPTY) + while (deferred_newlines) + deferred_newlines--, *op++ = '\r'; + span = 0; + } + + /* Copy as much as we can without special treatment. */ + while (speccase[ip[span]] == SPECCASE_EMPTY) span++; + memcpy (op, ip, span); + op += span; + ip += span; + + switch (speccase[*ip++]) + { + case SPECCASE_NUL: /* \0 */ + ibase[-1] = op[-1]; + goto read_next; + + case SPECCASE_CR: /* \r */ + if (ip[-2] == '\n') + continue; + else if (*ip == '\n') + ip++; + else if (*ip == '\0') + { + *--ibase = '\r'; + goto read_next; + } + *op++ = '\n'; + break; + + case SPECCASE_BACKSLASH: /* \ */ + backslash: + { + /* If we're at the end of the intermediate buffer, + we have to shift the backslash down to the start + and come back next pass. */ + if (*ip == '\0') + { + *--ibase = '\\'; + goto read_next; + } + else if (*ip == '\n') + { + ip++; + if (*ip == '\r') ip++; + if (*ip == '\n' || *ip == '\t' || *ip == ' ') + *op++ = '\r'; + else if (op[-1] == '\t' || op[-1] == ' ' + || op[-1] == '\r' || op[-1] == '\n') + *op++ = '\r'; + else + deferred_newlines++; + } + else if (*ip == '\r') + { + ip++; + if (*ip == '\n') ip++; + else if (*ip == '\0') + { + *--ibase = '\r'; + *--ibase = '\\'; + goto read_next; + } + else if (*ip == '\r' || *ip == '\t' || *ip == ' ') + *op++ = '\r'; + else + deferred_newlines++; + } + else + *op++ = '\\'; + } + break; + + case SPECCASE_QUESTION: /* ? */ + { + unsigned int d, t; + /* If we're at the end of the intermediate buffer, + we have to shift the ?'s down to the start and + come back next pass. */ + d = ip[0]; + if (d == '\0') + { + *--ibase = '?'; + goto read_next; + } + if (d != '?') + { + *op++ = '?'; + break; + } + d = ip[1]; + if (d == '\0') + { + *--ibase = '?'; + *--ibase = '?'; + goto read_next; + } + + /* Trigraph map: + * from to from to from to + * ?? = # ?? ) ] ?? ! | + * ?? ( [ ?? ' ^ ?? > } + * ?? / \ ?? < { ?? - ~ + */ + if (d == '=') t = '#'; + else if (d == ')') t = ']'; + else if (d == '!') t = '|'; + else if (d == '(') t = '['; + else if (d == '\'') t = '^'; + else if (d == '>') t = '}'; + else if (d == '/') t = '\\'; + else if (d == '<') t = '{'; + else if (d == '-') t = '~'; + else + { + *op++ = '?'; + break; + } + ip += 2; + if (CPP_OPTIONS (pfile)->warn_trigraphs) + { + unsigned long col; + line_base = find_position (line_base, op, &line); + col = op - line_base + 1; + if (CPP_OPTIONS (pfile)->trigraphs) + cpp_warning_with_line (pfile, line, col, + "trigraph ??%c converted to %c", d, t); + else + cpp_warning_with_line (pfile, line, col, + "trigraph ??%c ignored", d); + } + if (CPP_OPTIONS (pfile)->trigraphs) + { + if (t == '\\') + goto backslash; + else + *op++ = t; + } + else + { + *op++ = '?'; + *op++ = '?'; + *op++ = d; + } + } + } + } + } + + if (offset == 0) + return 0; + + /* Deal with pushed-back chars at true EOF. + This may be any of: ?? ? \ \r \n \\r \\n. + \r must become \n, \\r or \\n must become \r. + We know we have space already. */ + if (ibase == pfile->input_buffer) + { + if (*ibase == '?') + { + *op++ = '?'; + *op++ = '?'; + } + else + *op++ = '\r'; + } + else if (ibase == pfile->input_buffer + 1) + { + if (*ibase == '\r') + *op++ = '\n'; + else + *op++ = *ibase; + } + + if (op[-1] != '\n') + { + unsigned long col; + line_base = find_position (line_base, op, &line); + col = op - line_base + 1; + cpp_warning_with_line (pfile, line, col, "no newline at end of file\n"); + if (offset + 1 > len) + { + len += 1; + if (offset + 1 > len) + goto too_big; + buf = (U_CHAR *) xrealloc (buf, len); + op = buf + offset; + } + *op++ = '\n'; + } + + fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf)); + return op - buf; + + too_big: + cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset); + free (buf); + return -1; + + error: + cpp_error_from_errno (pfile, fp->ihash->name); + free (buf); + return -1; +} + +/* Initialize the `input_buffer' and `input_speccase' tables. + These are only used by read_and_prescan, but they're large and + somewhat expensive to set up, so we want them allocated once for + the duration of the cpp run. */ + +void +_cpp_init_input_buffer (pfile) + cpp_reader *pfile; +{ + U_CHAR *tmp; + + /* Table of characters that cannot be handled by the + read_and_prescan inner loop. The number of non-EMPTY entries + should be as small as humanly possible. */ + + tmp = (U_CHAR *) xmalloc (1 << CHAR_BIT); + memset (tmp, SPECCASE_EMPTY, 1 << CHAR_BIT); + tmp['\0'] = SPECCASE_NUL; + tmp['\r'] = SPECCASE_CR; + tmp['\\'] = SPECCASE_BACKSLASH; + if (CPP_OPTIONS (pfile)->trigraphs || CPP_OPTIONS (pfile)->warn_trigraphs) + tmp['?'] = SPECCASE_QUESTION; + + pfile->input_speccase = tmp; + + /* Determine the appropriate size for the input buffer. Normal C + source files are smaller than eight K. */ + /* 8Kbytes of buffer proper, 2 to detect running off the end without + address arithmetic all the time, and 2 for pushback in the case + there's a potential trigraph or end-of-line digraph at the end of + a block. */ + + tmp = (U_CHAR *) xmalloc (8192 + 2 + 2); + pfile->input_buffer = tmp; + pfile->input_buffer_len = 8192; +} |