diff options
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/c-c++-common/raw-string-14.c | 39 | ||||
-rw-r--r-- | gcc/testsuite/c-c++-common/raw-string-16.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/c-c++-common/raw-string-5.c | 14 | ||||
-rw-r--r-- | libcpp/ChangeLog | 6 | ||||
-rw-r--r-- | libcpp/lex.c | 522 |
6 files changed, 317 insertions, 284 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0aa1b3f..4bf8964 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2020-05-19 Nathan Sidwell <nathan@acm.org> + + * c-c++-common/raw-string-14.c: Adjust errors. + * c-c++-common/raw-string-16.c: Likewise. + * c-c++-common/raw-string-5.c: Likewise. + 2020-05-19 Richard Biener <rguenther@suse.de> * gcc.target/i386/pr92645-4.c: Adjust expected pattern. diff --git a/gcc/testsuite/c-c++-common/raw-string-14.c b/gcc/testsuite/c-c++-common/raw-string-14.c index b2e395c..fba826c 100644 --- a/gcc/testsuite/c-c++-common/raw-string-14.c +++ b/gcc/testsuite/c-c++-common/raw-string-14.c @@ -4,36 +4,33 @@ // { dg-options "-std=c++11" { target c++ } } const void *s0 = R"abc\ -def()abcdef"; - // { dg-error "invalid character" "invalid" { target *-*-* } 6 } - // { dg-error "stray" "stray" { target *-*-* } 6 } +def()abcdef" 0; + // { dg-error "invalid character" "invalid" { target *-*-* } .-2 } + // { dg-error "stray" "stray" { target *-*-* } .-3 } const void *s1 = R"??/ -()??/"; - // { dg-error "invalid new-line" "invalid" { target *-*-* } 10 } - // { dg-error "stray" "stray" { target *-*-* } 10 } - // { dg-warning "missing terminating" "missing" { target *-*-* } 10 } - // { dg-error "19:missing terminating" "missing" { target *-*-* } 10 } -const void *s2 = R"abcdefghijklmn??/(a)abcdefghijklmn???"; +()??/" 0; + // { dg-error "invalid new-line" "invalid" { target *-*-* } .-2 } + // { dg-error "stray" "stray" { target *-*-* } .-3 } +const void *s2 = R"abcdefghijklmn??/(a)abcdefghijklmn???" 0; // { dg-error "raw string delimiter longer" "longer" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } - // { dg-error "expected" "expected" { target *-*-* } .-3 } -const void *s3 = R"abcdefghijklmno??/(a)abcdefghijklmno???"; +const void *s3 = R"abcdefghijklmno??/(a)abcdefghijklmno???" 0; // { dg-error "raw string delimiter longer" "longer" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } -const void *s4 = R"abcdefghijklmnop??=(a)abcdefghijklmnop??="; +const void *s4 = R"abcdefghijklmnop??=(a)abcdefghijklmnop??=" 0; // { dg-error "raw string delimiter longer" "longer" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } const void *s5 = R"abc\ -()abcdef"; - // { dg-error "invalid character" "invalid" { target *-*-* } 26 } - // { dg-error "stray" "stray" { target *-*-* } 26 } +()abcdef" 0; + // { dg-error "invalid character" "invalid" { target *-*-* } .-2 } + // { dg-error "stray" "stray" { target *-*-* } .-3 } const void *s6 = R"\ -()"; - // { dg-error "invalid character" "invalid" { target *-*-* } 30 } - // { dg-error "stray" "stray" { target *-*-* } 30 } +()" 0; + // { dg-error "invalid character" "invalid" { target *-*-* } .-2 } + // { dg-error "stray" "stray" { target *-*-* } .-3 } const void *s7 = R"\ -a()a"; - // { dg-error "invalid character" "invalid" { target *-*-* } 34 } - // { dg-error "stray" "stray" { target *-*-* } 34 } +a()a" 0; + // { dg-error "invalid character" "invalid" { target *-*-* } .-2 } + // { dg-error "stray" "stray" { target *-*-* } .-3 } int main () {} diff --git a/gcc/testsuite/c-c++-common/raw-string-16.c b/gcc/testsuite/c-c++-common/raw-string-16.c index 1185c0e..1bf16dd 100644 --- a/gcc/testsuite/c-c++-common/raw-string-16.c +++ b/gcc/testsuite/c-c++-common/raw-string-16.c @@ -4,29 +4,29 @@ // { dg-options "-std=gnu++11 -Wtrigraphs" { target c++ } } const void *s0 = R"abc\ -def()abcdef"; +def()abcdef" 0; // { dg-error "invalid character" "invalid" { target *-*-* } 6 } // { dg-error "stray" "stray" { target *-*-* } 6 } -const void *s1 = R"abcdefghijklmn??/(a)abcdefghijklmn???"; +const void *s1 = R"abcdefghijklmn??/(a)abcdefghijklmn???" 0; // { dg-error "raw string delimiter longer" "longer" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } -const void *s2 = R"abcdefghijklmno??/(a)abcdefghijklmno???"; +const void *s2 = R"abcdefghijklmno??/(a)abcdefghijklmno???" 0; // { dg-error "raw string delimiter longer" "longer" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } -const void *s3 = R"abcdefghijklmnop??=(a)abcdefghijklmnop??=?"; // { dg-warning "trigraph ..= ignored" } +const void *s3 = R"abcdefghijklmnop??=(a)abcdefghijklmnop??=?" 0; // { dg-error "raw string delimiter longer" "longer" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } const void *s4 = R"abc\ -()abcdef"; +()abcdef" 0; // { dg-error "invalid character" "invalid" { target *-*-* } 20 } // { dg-error "stray" "stray" { target *-*-* } 20 } const void *s5 = R"\ -()"; +()" 0; // { dg-error "invalid character" "invalid" { target *-*-* } 24 } // { dg-error "stray" "stray" { target *-*-* } 24 } const void *s6 = R"\ -a()a"; +a()a" 0; // { dg-error "invalid character" "invalid" { target *-*-* } 28 } // { dg-error "stray" "stray" { target *-*-* } 28 } diff --git a/gcc/testsuite/c-c++-common/raw-string-5.c b/gcc/testsuite/c-c++-common/raw-string-5.c index 78602dc..dbf3133 100644 --- a/gcc/testsuite/c-c++-common/raw-string-5.c +++ b/gcc/testsuite/c-c++-common/raw-string-5.c @@ -2,25 +2,25 @@ // { dg-options "-std=gnu99" { target c } } // { dg-options "-std=c++0x" { target c++ } } -const void *s0 = R"0123456789abcdefg()0123456789abcdefg"; +const void *s0 = R"0123456789abcdefg()0123456789abcdefg" 0; // { dg-error "raw string delimiter longer" "longer" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } -const void *s1 = R" () "; +const void *s1 = R" () " 0; // { dg-error "invalid character" "invalid" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } -const void *s2 = R" () "; +const void *s2 = R" () " 0; // { dg-error "invalid character" "invalid" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } -const void *s3 = R")())"; +const void *s3 = R")())" 0; // { dg-error "invalid character" "invalid" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } -const void *s4 = R"@()@"; +const void *s4 = R"@()@" 0; // { dg-error "invalid character" "invalid" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } -const void *s5 = R"$()$"; +const void *s5 = R"$()$" 0; // { dg-error "invalid character" "invalid" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } -const void *s6 = R"\u0040()\u0040"; +const void *s6 = R"\u0040()\u0040" 0; // { dg-error "invalid character" "invalid" { target *-*-* } .-1 } // { dg-error "stray" "stray" { target *-*-* } .-2 } diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 6c3cf09..5bfa184 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,5 +1,11 @@ 2020-05-18 Nathan Sidwell <nathan@acm.org> + PR preprocessor/95149 + * lex.c (struct lit_accum): New. + (bufring_append): Replace by lit_accum::append. + (lex_raw_string): Reimplement, using fragments of the old version. + (lex_string): Adjust lex_raw_string call. + PR preprocessor/95182 * macro.c (collect_args): Preserve EOFif we fell out of the main file. diff --git a/libcpp/lex.c b/libcpp/lex.c index f0ee0f9..5d94882 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -1586,35 +1586,74 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, token->val.str.text = dest; } +/* A pair of raw buffer pointers. The currently open one is [1], the + first one is [0]. Used for string literal lexing. */ +struct lit_accum { + _cpp_buff *first; + _cpp_buff *last; + const uchar *rpos; + size_t accum; + + lit_accum () + : first (NULL), last (NULL), rpos (0), accum (0) + { + } + + void append (cpp_reader *, const uchar *, size_t); + + void read_begin (cpp_reader *); + bool reading_p () const + { + return rpos != NULL; + } + char read_char () + { + char c = *rpos++; + if (rpos == BUFF_FRONT (last)) + rpos = NULL; + return c; + } +}; + /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer sequence from *FIRST_BUFF_P to LAST_BUFF_P. */ -static void -bufring_append (cpp_reader *pfile, const uchar *base, size_t len, - _cpp_buff **first_buff_p, _cpp_buff **last_buff_p) +void +lit_accum::append (cpp_reader *pfile, const uchar *base, size_t len) { - _cpp_buff *first_buff = *first_buff_p; - _cpp_buff *last_buff = *last_buff_p; - - if (first_buff == NULL) - first_buff = last_buff = _cpp_get_buff (pfile, len); - else if (len > BUFF_ROOM (last_buff)) + if (!last) + /* Starting. */ + first = last = _cpp_get_buff (pfile, len); + else if (len > BUFF_ROOM (last)) { - size_t room = BUFF_ROOM (last_buff); - memcpy (BUFF_FRONT (last_buff), base, room); - BUFF_FRONT (last_buff) += room; + /* There is insufficient room in the buffer. Copy what we can, + and then either extend or create a new one. */ + size_t room = BUFF_ROOM (last); + memcpy (BUFF_FRONT (last), base, room); + BUFF_FRONT (last) += room; base += room; len -= room; - last_buff = _cpp_append_extend_buff (pfile, last_buff, len); - } + accum += room; + + gcc_checking_assert (!rpos); - memcpy (BUFF_FRONT (last_buff), base, len); - BUFF_FRONT (last_buff) += len; + last = _cpp_append_extend_buff (pfile, last, len); + } - *first_buff_p = first_buff; - *last_buff_p = last_buff; + memcpy (BUFF_FRONT (last), base, len); + BUFF_FRONT (last) += len; + accum += len; } +void +lit_accum::read_begin (cpp_reader *pfile) +{ + /* We never accumulate more than 4 chars to read. */ + if (BUFF_ROOM (last) < 4) + + last = _cpp_append_extend_buff (pfile, last, 4); + rpos = BUFF_FRONT (last); +} /* Returns true if a macro has been defined. This might not work if compile with -save-temps, @@ -1657,247 +1696,231 @@ is_macro_not_literal_suffix(cpp_reader *pfile, const uchar *base) return is_macro (pfile, base); } -/* Lexes a raw string. The stored string contains the spelling, including - double quotes, delimiter string, '(' and ')', any leading - 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the - literal, or CPP_OTHER if it was not properly terminated. +/* Lexes a raw string. The stored string contains the spelling, + including double quotes, delimiter string, '(' and ')', any leading + 'L', 'u', 'U' or 'u8' and 'R' modifier. The created token contains + the type of the literal, or CPP_OTHER if it was not properly + terminated. + + BASE is the start of the token. Updates pfile->buffer->cur to just + after the lexed string. The spelling is NUL-terminated, but it is not guaranteed that this is the first NUL since embedded NULs are preserved. */ static void -lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, - const uchar *cur) +lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base) { - uchar raw_prefix[17]; - uchar temp_buffer[18]; - const uchar *orig_base; - unsigned int raw_prefix_len = 0, raw_suffix_len = 0; - enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX }; - raw_str_phase phase = RAW_STR_PREFIX; - enum cpp_ttype type; - size_t total_len = 0; - /* Index into temp_buffer during phases other than RAW_STR, - during RAW_STR phase 17 to tell BUF_APPEND that nothing should - be appended to temp_buffer. */ - size_t temp_buffer_len = 0; - _cpp_buff *first_buff = NULL, *last_buff = NULL; - size_t raw_prefix_start; + const uchar *pos = base; + + /* 'tis a pity this information isn't passed down from the lexer's + initial categorization of the token. */ + enum cpp_ttype type = CPP_STRING; + + if (*pos == 'L') + { + type = CPP_WSTRING; + pos++; + } + else if (*pos == 'U') + { + type = CPP_STRING32; + pos++; + } + else if (*pos == 'u') + { + if (pos[1] == '8') + { + type = CPP_UTF8STRING; + pos++; + } + else + type = CPP_STRING16; + pos++; + } + + gcc_checking_assert (pos[0] == 'R' && pos[1] == '"'); + pos += 2; + _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note]; - type = (*base == 'L' ? CPP_WSTRING : - *base == 'U' ? CPP_STRING32 : - *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16) - : CPP_STRING); - -#define BUF_APPEND(STR,LEN) \ - do { \ - bufring_append (pfile, (const uchar *)(STR), (LEN), \ - &first_buff, &last_buff); \ - total_len += (LEN); \ - if (__builtin_expect (temp_buffer_len < 17, 0) \ - && (const uchar *)(STR) != base \ - && (LEN) <= 2) \ - { \ - memcpy (temp_buffer + temp_buffer_len, \ - (const uchar *)(STR), (LEN)); \ - temp_buffer_len += (LEN); \ - } \ - } while (0) - - orig_base = base; - ++cur; - raw_prefix_start = cur - base; + /* Skip notes before the ". */ + while (note->pos < pos) + ++note; + + lit_accum accum; + + uchar prefix[17]; + unsigned prefix_len = 0; + enum Phase + { + PHASE_PREFIX = -2, + PHASE_NONE = -1, + PHASE_SUFFIX = 0 + } phase = PHASE_PREFIX; + for (;;) { - cppchar_t c; - - /* If we previously performed any trigraph or line splicing - transformations, undo them in between the opening and closing - double quote. */ - while (note->pos < cur) - ++note; - for (; note->pos == cur; ++note) - { - switch (note->type) - { - case '\\': - case ' ': - /* Restore backslash followed by newline. */ - BUF_APPEND (base, cur - base); - base = cur; - BUF_APPEND ("\\", 1); - after_backslash: - if (note->type == ' ') - { - /* GNU backslash whitespace newline extension. FIXME - could be any sequence of non-vertical space. When we - can properly restore any such sequence, we should mark - this note as handled so _cpp_process_line_notes - doesn't warn. */ - BUF_APPEND (" ", 1); - } + gcc_checking_assert (note->pos >= pos); - BUF_APPEND ("\n", 1); - break; + /* Undo any escaped newlines and trigraphs. */ + if (!accum.reading_p () && note->pos == pos) + switch (note->type) + { + case '\\': + case ' ': + /* Restore backslash followed by newline. */ + accum.append (pfile, base, pos - base); + base = pos; + accum.read_begin (pfile); + accum.append (pfile, UC"\\", 1); + + after_backslash: + if (note->type == ' ') + /* GNU backslash whitespace newline extension. FIXME + could be any sequence of non-vertical space. When we + can properly restore any such sequence, we should + mark this note as handled so _cpp_process_line_notes + doesn't warn. */ + accum.append (pfile, UC" ", 1); + + accum.append (pfile, UC"\n", 1); + note++; + break; - case 0: - /* Already handled. */ - break; + case '\n': + /* This can happen for ??/<NEWLINE> when trigraphs are not + being interpretted. */ + gcc_checking_assert (!CPP_OPTION (pfile, trigraphs)); + note->type = 0; + note++; + break; - default: - if (_cpp_trigraph_map[note->type]) - { - /* Don't warn about this trigraph in - _cpp_process_line_notes, since trigraphs show up as - trigraphs in raw strings. */ - uchar type = note->type; - note->type = 0; - - if (!CPP_OPTION (pfile, trigraphs)) - /* If we didn't convert the trigraph in the first - place, don't do anything now either. */ - break; + default: + gcc_checking_assert (_cpp_trigraph_map[note->type]); - BUF_APPEND (base, cur - base); - base = cur; - BUF_APPEND ("??", 2); + /* Don't warn about this trigraph in + _cpp_process_line_notes, since trigraphs show up as + trigraphs in raw strings. */ + uchar type = note->type; + note->type = 0; - /* ??/ followed by newline gets two line notes, one for - the trigraph and one for the backslash/newline. */ - if (type == '/' && note[1].pos == cur) - { - if (note[1].type != '\\' - && note[1].type != ' ') - abort (); - BUF_APPEND ("/", 1); - ++note; - goto after_backslash; - } - else - { - /* Skip the replacement character. */ - base = ++cur; - BUF_APPEND (&type, 1); - c = type; - goto check_c; - } - } - else - abort (); - break; - } - } - c = *cur++; - if (__builtin_expect (temp_buffer_len < 17, 0)) - temp_buffer[temp_buffer_len++] = c; + if (CPP_OPTION (pfile, trigraphs)) + { + accum.append (pfile, base, pos - base); + base = pos; + accum.read_begin (pfile); + accum.append (pfile, UC"??", 2); + accum.append (pfile, &type, 1); + + /* ??/ followed by newline gets two line notes, one for + the trigraph and one for the backslash/newline. */ + if (type == '/' && note[1].pos == pos) + { + note++; + gcc_assert (note->type == '\\' || note->type == ' '); + goto after_backslash; + } + /* Skip the replacement character. */ + base = ++pos; + } - check_c: - if (phase == RAW_STR_PREFIX) + note++; + break; + } + + /* Now get a char to process. Either from an expanded note, or + from the line buffer. */ + bool read_note = accum.reading_p (); + char c = read_note ? accum.read_char () : *pos++; + + if (phase == PHASE_PREFIX) { - while (raw_prefix_len < temp_buffer_len) + if (c == '(') { - raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len]; - switch (raw_prefix[raw_prefix_len]) - { - case ' ': case '(': case ')': case '\\': case '\t': - case '\v': case '\f': case '\n': default: - break; - /* Basic source charset except the above chars. */ - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - case '_': case '{': case '}': case '#': case '[': case ']': - case '<': case '>': case '%': case ':': case ';': case '.': - case '?': case '*': case '+': case '-': case '/': case '^': - case '&': case '|': case '~': case '!': case '=': case ',': - case '"': case '\'': - if (raw_prefix_len < 16) - { - raw_prefix_len++; - continue; - } - break; - } - - if (raw_prefix[raw_prefix_len] != '(') - { - int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1; - if (raw_prefix_len == 16) - cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, - col, "raw string delimiter longer " - "than 16 characters"); - else if (raw_prefix[raw_prefix_len] == '\n') - cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, - col, "invalid new-line in raw " - "string delimiter"); - else - cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, - col, "invalid character '%c' in " - "raw string delimiter", - (int) raw_prefix[raw_prefix_len]); - pfile->buffer->cur = orig_base + raw_prefix_start - 1; - create_literal (pfile, token, orig_base, - raw_prefix_start - 1, CPP_OTHER); - if (first_buff) - _cpp_release_buff (pfile, first_buff); - return; - } - raw_prefix[raw_prefix_len] = '"'; - phase = RAW_STR; - /* Nothing should be appended to temp_buffer during - RAW_STR phase. */ - temp_buffer_len = 17; - break; + /* Done. */ + phase = PHASE_NONE; + prefix[prefix_len++] = '"'; + } + else if (prefix_len < 16 + /* Prefix chars are any of the basic character set, + [lex.charset] except for ' + ()\\\t\v\f\n'. Optimized for a contiguous + alphabet. */ + /* Unlike a switch, this collapses down to one or + two shift and bitmask operations on an ASCII + system, with an outlier or two. */ + && (('Z' - 'A' == 25 + ? ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) + : ISIDST (c)) + || (c >= '0' && c <= '9') + || c == '_' || c == '{' || c == '}' + || c == '[' || c == ']' || c == '#' + || c == '<' || c == '>' || c == '%' + || c == ':' || c == ';' || c == '.' || c == '?' + || c == '*' || c == '+' || c == '-' || c == '/' + || c == '^' || c == '&' || c == '|' || c == '~' + || c == '!' || c == '=' || c == ',' + || c == '"' || c == '\'')) + prefix[prefix_len++] = c; + else + { + /* Something is wrong. */ + int col = CPP_BUF_COLUMN (pfile->buffer, pos) + read_note; + if (prefix_len == 16) + cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, + col, "raw string delimiter longer " + "than 16 characters"); + else if (c == '\n') + cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, + col, "invalid new-line in raw " + "string delimiter"); + else + cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, + col, "invalid character '%c' in " + "raw string delimiter", c); + type = CPP_OTHER; + phase = PHASE_NONE; + /* Continue until we get a close quote, that's probably + the best failure mode. */ + prefix_len = 0; } continue; } - else if (phase == RAW_STR_SUFFIX) + + if (phase != PHASE_NONE) { - while (raw_suffix_len <= raw_prefix_len - && raw_suffix_len < temp_buffer_len - && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len]) - raw_suffix_len++; - if (raw_suffix_len > raw_prefix_len) + if (prefix[phase] != c) + phase = PHASE_NONE; + else if (unsigned (phase + 1) == prefix_len) break; - if (raw_suffix_len == temp_buffer_len) - continue; - phase = RAW_STR; - /* Nothing should be appended to temp_buffer during - RAW_STR phase. */ - temp_buffer_len = 17; - } - if (c == ')') - { - phase = RAW_STR_SUFFIX; - raw_suffix_len = 0; - temp_buffer_len = 0; + else + { + phase = Phase (phase + 1); + continue; + } } - else if (c == '\n') + + if (!prefix_len && c == '"') + /* Failure mode lexing. */ + goto out; + else if (prefix_len && c == ')') + phase = PHASE_SUFFIX; + else if (!read_note && c == '\n') { + pos--; + pfile->buffer->cur = pos; if (pfile->state.in_directive || (pfile->state.parsing_args && pfile->buffer->next_line >= pfile->buffer->rlimit)) { - cur--; - type = CPP_OTHER; cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0, "unterminated raw string"); - break; + type = CPP_OTHER; + goto out; } - BUF_APPEND (base, cur - base); - - pfile->buffer->cur = cur-1; + accum.append (pfile, base, pos - base + 1); _cpp_process_line_notes (pfile, false); if (pfile->buffer->next_line < pfile->buffer->rlimit) @@ -1906,13 +1929,14 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, if (!_cpp_get_fresh_line (pfile)) { + /* We ran out of file and failed to get a line. */ location_t src_loc = token->src_loc; token->type = CPP_EOF; /* Tell the compiler the line number of the EOF token. */ token->src_loc = pfile->line_table->highest_line; token->flags = BOL; - if (first_buff != NULL) - _cpp_release_buff (pfile, first_buff); + if (accum.first) + _cpp_release_buff (pfile, accum.first); cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0, "unterminated raw string"); /* Now pop the buffer that _cpp_get_fresh_line did not. */ @@ -1920,7 +1944,7 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, return; } - cur = base = pfile->buffer->cur; + pos = base = pfile->buffer->cur; note = &pfile->buffer->notes[pfile->buffer->cur_note]; } } @@ -1930,7 +1954,7 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, /* If a string format macro, say from inttypes.h, is placed touching a string literal it could be parsed as a C++11 user-defined string literal thus breaking the program. */ - if (is_macro_not_literal_suffix (pfile, cur)) + if (is_macro_not_literal_suffix (pfile, pos)) { /* Raise a warning, but do not consume subsequent tokens. */ if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping) @@ -1940,37 +1964,37 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base, "a space between literal and string macro"); } /* Grab user defined literal suffix. */ - else if (ISIDST (*cur)) + else if (ISIDST (*pos)) { type = cpp_userdef_string_add_type (type); - ++cur; + ++pos; - while (ISIDNUM (*cur)) - ++cur; + while (ISIDNUM (*pos)) + ++pos; } } - pfile->buffer->cur = cur; - if (first_buff == NULL) - create_literal (pfile, token, base, cur - base, type); + out: + pfile->buffer->cur = pos; + if (!accum.accum) + create_literal (pfile, token, base, pos - base, type); else { - uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1); + size_t extra_len = pos - base; + uchar *dest = _cpp_unaligned_alloc (pfile, accum.accum + extra_len + 1); token->type = type; - token->val.str.len = total_len + (cur - base); + token->val.str.len = accum.accum + extra_len; token->val.str.text = dest; - last_buff = first_buff; - while (last_buff != NULL) + for (_cpp_buff *buf = accum.first; buf; buf = buf->next) { - memcpy (dest, last_buff->base, - BUFF_FRONT (last_buff) - last_buff->base); - dest += BUFF_FRONT (last_buff) - last_buff->base; - last_buff = last_buff->next; + size_t len = BUFF_FRONT (buf) - buf->base; + memcpy (dest, buf->base, len); + dest += len; } - _cpp_release_buff (pfile, first_buff); - memcpy (dest, base, cur - base); - dest[cur - base] = '\0'; + _cpp_release_buff (pfile, accum.first); + memcpy (dest, base, extra_len); + dest[extra_len] = '\0'; } } @@ -2003,7 +2027,7 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) } if (terminator == 'R') { - lex_raw_string (pfile, token, base, cur); + lex_raw_string (pfile, token, base); return; } if (terminator == '"') |