diff options
Diffstat (limited to 'libcpp')
-rw-r--r-- | libcpp/ChangeLog | 12 | ||||
-rw-r--r-- | libcpp/Makefile.in | 4 | ||||
-rw-r--r-- | libcpp/directives-only.c | 240 | ||||
-rw-r--r-- | libcpp/include/cpplib.h | 12 | ||||
-rw-r--r-- | libcpp/internal.h | 11 | ||||
-rw-r--r-- | libcpp/lex.c | 482 |
6 files changed, 508 insertions, 253 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 307cf3a..a1b78bb 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,15 @@ +2020-05-08 Nathan Sidwell <nathan@acm.org> + + Reimplement directives only processing, support raw literals. + * directives-only.c: Delete. + * Makefile.in (libcpp_a_OBJS, libcpp_a_SOURCES): Remove it. + * include/cpplib.h (enum CPP_DO_task): New enum. + (cpp_directive_only_preprocess): Declare. + * internal.h (_cpp_dir_only_callbacks): Delete. + (_cpp_preprocess_dir_only): Delete. + * lex.c (do_peek_backslask, do_peek_next, do_peek_prev): New. + (cpp_directives_only_process): New implementation. + 2020-02-14 Jakub Jelinek <jakub@redhat.com> Partially implement P1042R1: __VA_OPT__ wording clarifications diff --git a/libcpp/Makefile.in b/libcpp/Makefile.in index 8f8c8f6..3d9ca0b 100644 --- a/libcpp/Makefile.in +++ b/libcpp/Makefile.in @@ -83,11 +83,11 @@ COMPILER_FLAGS = $(ALL_CXXFLAGS) DEPMODE = $(CXXDEPMODE) -libcpp_a_OBJS = charset.o directives.o directives-only.o errors.o \ +libcpp_a_OBJS = charset.o directives.o errors.o \ expr.o files.o identifiers.o init.o lex.o line-map.o macro.o \ mkdeps.o pch.o symtab.o traditional.o -libcpp_a_SOURCES = charset.c directives.c directives-only.c errors.c \ +libcpp_a_SOURCES = charset.c directives.c errors.c \ expr.c files.c identifiers.c init.c lex.c line-map.c macro.c \ mkdeps.c pch.c symtab.c traditional.c diff --git a/libcpp/directives-only.c b/libcpp/directives-only.c deleted file mode 100644 index 5eac118..0000000 --- a/libcpp/directives-only.c +++ /dev/null @@ -1,240 +0,0 @@ -/* CPP Library - directive only preprocessing for distributed compilation. - Copyright (C) 2007-2020 Free Software Foundation, Inc. - Contributed by Ollie Wild <aaw@google.com>. - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -#include "config.h" -#include "system.h" -#include "cpplib.h" -#include "internal.h" - -/* DO (Directive only) flags. */ -#define DO_BOL (1 << 0) /* At the beginning of a logical line. */ -#define DO_STRING (1 << 1) /* In a string constant. */ -#define DO_CHAR (1 << 2) /* In a character constant. */ -#define DO_BLOCK_COMMENT (1 << 3) /* In a block comment. */ -#define DO_LINE_COMMENT (1 << 4) /* In a single line "//-style" comment. */ - -#define DO_LINE_SPECIAL (DO_STRING | DO_CHAR | DO_LINE_COMMENT) -#define DO_SPECIAL (DO_LINE_SPECIAL | DO_BLOCK_COMMENT) - -/* Writes out the preprocessed file, handling spacing and paste - avoidance issues. */ -void -_cpp_preprocess_dir_only (cpp_reader *pfile, - const struct _cpp_dir_only_callbacks *cb) -{ - struct cpp_buffer *buffer; - const unsigned char *cur, *base, *next_line, *rlimit; - cppchar_t c, last_c; - unsigned flags; - linenum_type lines; - int col; - location_t loc; - - restart: - /* Buffer initialization ala _cpp_clean_line(). */ - buffer = pfile->buffer; - buffer->cur_note = buffer->notes_used = 0; - buffer->cur = buffer->line_base = buffer->next_line; - buffer->need_line = false; - - /* This isn't really needed. It prevents a compiler warning, though. */ - loc = pfile->line_table->highest_line; - - /* Scan initialization. */ - next_line = cur = base = buffer->cur; - rlimit = buffer->rlimit; - flags = DO_BOL; - lines = 0; - col = 1; - - for (last_c = '\n', c = *cur; cur < rlimit; last_c = c, c = *++cur, ++col) - { - /* Skip over escaped newlines. */ - if (__builtin_expect (c == '\\', false)) - { - const unsigned char *tmp = cur + 1; - - while (is_nvspace (*tmp) && tmp < rlimit) - tmp++; - if (*tmp == '\r') - tmp++; - if (*tmp == '\n' && tmp < rlimit) - { - CPP_INCREMENT_LINE (pfile, 0); - lines++; - col = 0; - cur = tmp; - c = last_c; - continue; - } - } - - if (__builtin_expect (last_c == '#', false) && !(flags & DO_SPECIAL)) - { - if (c != '#' && (flags & DO_BOL)) - { - class line_maps *line_table; - - if (!pfile->state.skipping && next_line != base) - cb->print_lines (lines, base, next_line - base); - - /* Prep things for directive handling. */ - buffer->next_line = cur; - buffer->need_line = true; - _cpp_get_fresh_line (pfile); - - /* Ensure proper column numbering for generated error messages. */ - buffer->line_base -= col - 1; - - _cpp_handle_directive (pfile, false /* ignore indented */); - - /* Sanitize the line settings. Duplicate #include's can mess - things up. */ - line_table = pfile->line_table; - line_table->highest_location = line_table->highest_line; - - /* The if block prevents us from outputing line information when - the file ends with a directive and no newline. Note that we - must use pfile->buffer, not buffer. */ - if (pfile->buffer->next_line < pfile->buffer->rlimit) - cb->maybe_print_line (pfile->line_table->highest_line); - - goto restart; - } - - flags &= ~DO_BOL; - pfile->mi_valid = false; - } - else if (__builtin_expect (last_c == '/', false) \ - && !(flags & DO_SPECIAL) && c != '*' && c != '/') - { - /* If a previous slash is not starting a block comment, clear the - DO_BOL flag. */ - flags &= ~DO_BOL; - pfile->mi_valid = false; - } - - switch (c) - { - case '/': - if ((flags & DO_BLOCK_COMMENT) && last_c == '*') - { - flags &= ~DO_BLOCK_COMMENT; - c = 0; - } - else if (!(flags & DO_SPECIAL) && last_c == '/') - flags |= DO_LINE_COMMENT; - else if (!(flags & DO_SPECIAL)) - /* Mark the position for possible error reporting. */ - loc = linemap_position_for_column (pfile->line_table, col); - - break; - - case '*': - if (!(flags & DO_SPECIAL)) - { - if (last_c == '/') - flags |= DO_BLOCK_COMMENT; - else - { - flags &= ~DO_BOL; - pfile->mi_valid = false; - } - } - - break; - - case '\'': - case '"': - { - unsigned state = (c == '"') ? DO_STRING : DO_CHAR; - - if (!(flags & DO_SPECIAL)) - { - flags |= state; - flags &= ~DO_BOL; - pfile->mi_valid = false; - } - else if ((flags & state) && last_c != '\\') - flags &= ~state; - - break; - } - - case '\\': - { - if ((flags & (DO_STRING | DO_CHAR)) && last_c == '\\') - c = 0; - - if (!(flags & DO_SPECIAL)) - { - flags &= ~DO_BOL; - pfile->mi_valid = false; - } - - break; - } - - case '\n': - CPP_INCREMENT_LINE (pfile, 0); - lines++; - col = 0; - flags &= ~DO_LINE_SPECIAL; - if (!(flags & DO_SPECIAL)) - flags |= DO_BOL; - break; - - case '#': - next_line = cur; - /* Don't update DO_BOL yet. */ - break; - - case ' ': case '\t': case '\f': case '\v': case '\0': - break; - - default: - if (!(flags & DO_SPECIAL)) - { - flags &= ~DO_BOL; - pfile->mi_valid = false; - } - break; - } - } - - if (flags & DO_BLOCK_COMMENT) - cpp_error_with_line (pfile, CPP_DL_ERROR, loc, 0, "unterminated comment"); - - if (!pfile->state.skipping && cur != base) - { - /* If the file was not newline terminated, add rlimit, which is - guaranteed to point to a newline, to the end of our range. */ - if (cur[-1] != '\n') - { - cur++; - CPP_INCREMENT_LINE (pfile, 0); - lines++; - } - - cb->print_lines (lines, base, cur - base); - } - - _cpp_pop_buffer (pfile); - if (pfile->buffer) - goto restart; -} diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 03cc72a..2b1e33f 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -1304,6 +1304,18 @@ extern int cpp_read_state (cpp_reader *, const char *, FILE *, /* In lex.c */ extern void cpp_force_token_locations (cpp_reader *, location_t); extern void cpp_stop_forcing_token_locations (cpp_reader *); +enum CPP_DO_task +{ + CPP_DO_print, + CPP_DO_location, + CPP_DO_token +}; + +extern void cpp_directive_only_process (cpp_reader *pfile, + void *data, + void (*cb) (cpp_reader *, + CPP_DO_task, + void *data, ...)); /* In expr.c */ extern enum cpp_ttype cpp_userdef_string_remove_type diff --git a/libcpp/internal.h b/libcpp/internal.h index 97d9bdb..11b6469 100644 --- a/libcpp/internal.h +++ b/libcpp/internal.h @@ -747,17 +747,6 @@ extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *, extern void _cpp_pop_buffer (cpp_reader *); extern char *_cpp_bracket_include (cpp_reader *); -/* In directives.c */ -struct _cpp_dir_only_callbacks -{ - /* Called to print a block of lines. */ - void (*print_lines) (int, const void *, size_t); - bool (*maybe_print_line) (location_t); -}; - -extern void _cpp_preprocess_dir_only (cpp_reader *, - const struct _cpp_dir_only_callbacks *); - /* In traditional.c. */ extern bool _cpp_scan_out_logical_line (cpp_reader *, cpp_macro *, bool); extern bool _cpp_read_logical_line_trad (cpp_reader *); diff --git a/libcpp/lex.c b/libcpp/lex.c index 56ac3a1..3bcf073 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -3826,3 +3826,485 @@ cpp_stop_forcing_token_locations (cpp_reader *r) { r->forced_token_location = 0; } + +/* We're looking at \, if it's escaping EOL, look past it. If at + LIMIT, don't advance. */ + +static const unsigned char * +do_peek_backslash (const unsigned char *peek, const unsigned char *limit) +{ + const unsigned char *probe = peek; + + if (__builtin_expect (peek[1] == '\n', true)) + { + eol: + probe += 2; + if (__builtin_expect (probe < limit, true)) + { + peek = probe; + if (*peek == '\\') + /* The user might be perverse. */ + return do_peek_backslash (peek, limit); + } + } + else if (__builtin_expect (peek[1] == '\r', false)) + { + if (probe[2] == '\n') + probe++; + goto eol; + } + + return peek; +} + +static const unsigned char * +do_peek_next (const unsigned char *peek, const unsigned char *limit) +{ + if (__builtin_expect (*peek == '\\', false)) + peek = do_peek_backslash (peek, limit); + return peek; +} + +static const unsigned char * +do_peek_prev (const unsigned char *peek, const unsigned char *bound) +{ + if (peek == bound) + return NULL; + + unsigned char c = *--peek; + if (__builtin_expect (c == '\n', false) + || __builtin_expect (c == 'r', false)) + { + if (peek == bound) + return peek; + int ix = -1; + if (c == '\n' && peek[ix] == '\r') + { + if (peek + ix == bound) + return peek; + ix--; + } + + if (peek[ix] == '\\') + return do_peek_prev (peek + ix, bound); + + return peek; + } + else + return peek; +} + +/* Directives-only scanning. Somewhat more relaxed than correct + parsing -- some ill-formed programs will not be rejected. */ + +void +cpp_directive_only_process (cpp_reader *pfile, + void *data, + void (*cb) (cpp_reader *, CPP_DO_task, void *, ...)) +{ + do + { + restart: + /* Buffer initialization, but no line cleaning. */ + cpp_buffer *buffer = pfile->buffer; + buffer->cur_note = buffer->notes_used = 0; + buffer->cur = buffer->line_base = buffer->next_line; + buffer->need_line = false; + /* Files always end in a newline. We rely on this for + character peeking safety. */ + gcc_assert (buffer->rlimit[-1] == '\n'); + + const unsigned char *base = buffer->cur; + unsigned line_count = 0; + const unsigned char *line_start = base; + + bool bol = true; + bool raw = false; + + const unsigned char *lwm = base; + for (const unsigned char *pos = base, *limit = buffer->rlimit; + pos < limit;) + { + unsigned char c = *pos++; + /* This matches the switch in _cpp_lex_direct. */ + switch (c) + { + case ' ': case '\t': case '\f': case '\v': + /* Whitespace, do nothing. */ + break; + + case '\r': /* MAC line ending, or Windows \r\n */ + if (*pos == '\n') + pos++; + /* FALLTHROUGH */ + + case '\n': + bol = true; + + next_line: + CPP_INCREMENT_LINE (pfile, 0); + line_count++; + line_start = pos; + break; + + case '\\': + /* <backslash><newline> is removed, and doesn't undo any + preceeding escape or whatnot. */ + if (*pos == '\n') + { + pos++; + goto next_line; + } + else if (*pos == '\r') + { + if (pos[1] == '\n') + pos++; + pos++; + goto next_line; + } + goto dflt; + + case '#': + if (bol) + { + /* Line directive. */ + if (pos - 1 > base && !pfile->state.skipping) + cb (pfile, CPP_DO_print, data, + line_count, base, pos - 1 - base); + + /* Prep things for directive handling. */ + buffer->next_line = pos; + buffer->need_line = true; + _cpp_get_fresh_line (pfile); + + /* Ensure proper column numbering for generated + error messages. */ + buffer->line_base -= pos - line_start; + + _cpp_handle_directive (pfile, line_start + 1 != pos); + + /* Sanitize the line settings. Duplicate #include's can + mess things up. */ + // FIXME: Necessary? + pfile->line_table->highest_location + = pfile->line_table->highest_line; + + if (!pfile->state.skipping + && pfile->buffer->next_line < pfile->buffer->rlimit) + cb (pfile, CPP_DO_location, data, + pfile->line_table->highest_line); + + goto restart; + } + goto dflt; + + case '/': + { + const unsigned char *peek = do_peek_next (pos, limit); + if (!(*peek == '/' || *peek == '*')) + goto dflt; + + /* Line or block comment */ + bool is_block = *peek == '*'; + bool star = false; + bool esc = false; + location_t sloc + = linemap_position_for_column (pfile->line_table, + pos - line_start); + + while (pos < limit) + { + char c = *pos++; + switch (c) + { + case '\\': + esc = true; + break; + + case '\r': + if (*pos == '\n') + pos++; + /* FALLTHROUGH */ + + case '\n': + { + CPP_INCREMENT_LINE (pfile, 0); + line_count++; + line_start = pos; + if (!esc && !is_block) + { + bol = true; + goto done_comment; + } + } + if (!esc) + star = false; + esc = false; + break; + + case '*': + if (pos > peek && !esc) + star = is_block; + esc = false; + break; + + case '/': + if (star) + goto done_comment; + /* FALLTHROUGH */ + + default: + star = false; + esc = false; + break; + } + } + cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0, + "unterminated comment"); + done_comment: + lwm = pos; + break; + } + + case '\'': + if (!CPP_OPTION (pfile, digit_separators)) + goto delimited_string; + + /* Possibly a number punctuator. */ + if (!ISIDNUM (*do_peek_next (pos, limit))) + goto delimited_string; + + goto quote_peek; + + case '\"': + if (!CPP_OPTION (pfile, rliterals)) + goto delimited_string; + + quote_peek: + { + /* For ' see if it's a number punctuator + \.?<digit>(<digit>|<identifier-nondigit> + |'<digit>|'<nondigit>|[eEpP]<sign>|\.)* */ + /* For " see if it's a raw string + {U,L,u,u8}R. This includes CPP_NUMBER detection, + because that could be 0e+R. */ + const unsigned char *peek = pos - 1; + bool quote_first = c == '"'; + bool quote_eight = false; + bool maybe_number_start = false; + bool want_number = false; + + while ((peek = do_peek_prev (peek, lwm))) + { + unsigned char p = *peek; + if (quote_first) + { + if (!raw) + { + if (p != 'R') + break; + raw = true; + continue; + } + + quote_first = false; + if (p == 'L' || p == 'U' || p == 'u') + ; + else if (p == '8') + quote_eight = true; + else + goto second_raw; + } + else if (quote_eight) + { + if (p != 'u') + { + raw = false; + break; + } + quote_eight = false; + } + else if (c == '"') + { + second_raw:; + if (!want_number && ISIDNUM (p)) + { + raw = false; + break; + } + } + + if (ISDIGIT (p)) + maybe_number_start = true; + else if (p == '.') + want_number = true; + else if (ISIDNUM (p)) + maybe_number_start = false; + else if (p == '+' || p == '-') + { + if (const unsigned char *peek_prev + = do_peek_prev (peek, lwm)) + { + p = *peek_prev; + if (p == 'e' || p == 'E' + || p == 'p' || p == 'P') + { + want_number = true; + maybe_number_start = false; + } + else + break; + } + else + break; + } + else if (p == '\'' || p == '\"') + { + /* If this is lwm, this must be the end of a + previous string. So this is a trailing + literal type, (a) if those are allowed, + and (b) maybe_start is false. Otherwise + this must be a CPP_NUMBER because we've + met another ', and we'd have checked that + in its own right. */ + if (peek == lwm && CPP_OPTION (pfile, uliterals)) + { + if (!maybe_number_start && !want_number) + /* Must be a literal type. */ + raw = false; + } + else if (p == '\'' + && CPP_OPTION (pfile, digit_separators)) + maybe_number_start = true; + break; + } + else if (c == '\'') + break; + else if (!quote_first && !quote_eight) + break; + } + + if (maybe_number_start) + { + if (c == '\'') + /* A CPP NUMBER. */ + goto dflt; + raw = false; + } + + goto delimited_string; + } + + delimited_string: + { + /* (Possibly raw) string or char literal. */ + unsigned char end = c; + int delim_len = -1; + const unsigned char *delim = NULL; + location_t sloc = linemap_position_for_column (pfile->line_table, + pos - line_start); + int esc = 0; + + if (raw) + { + /* There can be no line breaks in the delimiter. */ + delim = pos; + for (delim_len = 0; (c = *pos++) != '('; delim_len++) + { + if (delim_len == 16) + { + cpp_error_with_line (pfile, CPP_DL_ERROR, + sloc, 0, + "raw string delimiter" + " longer than %d" + " characters", + delim_len); + raw = false; + pos = delim; + break; + } + if (strchr (") \\\t\v\f\n", c)) + { + cpp_error_with_line (pfile, CPP_DL_ERROR, + sloc, 0, + "invalid character '%c'" + " in raw string" + " delimiter", c); + raw = false; + pos = delim; + break; + } + if (pos >= limit) + goto bad_string; + } + } + + while (pos < limit) + { + char c = *pos++; + switch (c) + { + case '\\': + if (!raw) + esc++; + break; + + case '\r': + if (*pos == '\n') + pos++; + /* FALLTHROUGH */ + + case '\n': + { + CPP_INCREMENT_LINE (pfile, 0); + line_count++; + line_start = pos; + } + if (esc) + esc--; + break; + + case ')': + if (raw + && pos + delim_len + 1 < limit + && pos[delim_len] == end + && !memcmp (delim, pos, delim_len)) + { + pos += delim_len + 1; + raw = false; + goto done_string; + } + break; + + default: + if (!raw && !(esc & 1) && c == end) + goto done_string; + esc = 0; + break; + } + } + bad_string: + cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0, + "unterminated literal"); + + done_string: + raw = false; + lwm = pos - 1; + } + goto dflt; + + default: + dflt: + bol = false; + pfile->mi_valid = false; + break; + } + } + + if (buffer->rlimit > base && !pfile->state.skipping) + cb (pfile, CPP_DO_print, data, line_count, base, buffer->rlimit - base); + + _cpp_pop_buffer (pfile); + } + while (pfile->buffer); +} |