aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Malcolm <dmalcolm@redhat.com>2016-12-15 18:05:05 +0000
committerDavid Malcolm <dmalcolm@gcc.gnu.org>2016-12-15 18:05:05 +0000
commita3998c2fb1630638db83defcd2c680111d65b973 (patch)
tree6691b9007e679032843ddb4ba453fa49409bf3d2
parenta3038e190b271744d0b5e2e83d389864fed4d54e (diff)
downloadgcc-a3998c2fb1630638db83defcd2c680111d65b973.zip
gcc-a3998c2fb1630638db83defcd2c680111d65b973.tar.gz
gcc-a3998c2fb1630638db83defcd2c680111d65b973.tar.bz2
Fix use-after-free lexing unterminated raw strings (PR preprocessor/78811)
gcc/ChangeLog: PR preprocessor/78680 PR preprocessor/78811 * input.c (struct selftest::lexer_test): Add field m_implicitly_expect_EOF. (selftest::lexer_error_sink): New class. (selftest::lexer_error_sink::s_singleton): New global. (selftest::lexer_test::lexer_test): Initialize new field "m_implicitly_expect_EOF". (selftest::lexer_test::~lexer_test): Conditionalize the check for the EOF token on the new field. (selftest::test_lexer_string_locations_raw_string_unterminated): New function. (selftest::input_c_tests): Call the new test. libcpp/ChangeLog: PR preprocessor/78680 PR preprocessor/78811 * lex.c (_cpp_lex_direct): Only determine the end-location of the token and build a range for non-reserved start locations. Do not do it for EOF tokens. From-SVN: r243721
-rw-r--r--gcc/ChangeLog16
-rw-r--r--gcc/input.c89
-rw-r--r--libcpp/ChangeLog8
-rw-r--r--libcpp/lex.c40
4 files changed, 129 insertions, 24 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0d31d0d..f8b7fed 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2016-12-15 David Malcolm <dmalcolm@redhat.com>
+
+ PR preprocessor/78680
+ PR preprocessor/78811
+ * input.c (struct selftest::lexer_test): Add field
+ m_implicitly_expect_EOF.
+ (selftest::lexer_error_sink): New class.
+ (selftest::lexer_error_sink::s_singleton): New global.
+ (selftest::lexer_test::lexer_test): Initialize new field
+ "m_implicitly_expect_EOF".
+ (selftest::lexer_test::~lexer_test): Conditionalize the
+ check for the EOF token on the new field.
+ (selftest::test_lexer_string_locations_raw_string_unterminated):
+ New function.
+ (selftest::input_c_tests): Call the new test.
+
2016-12-15 Wilco Dijkstra <wdijkstr@arm.com>
* config/arm/arm.h (TARGET_BACKTRACE): Use crtl->is_leaf.
diff --git a/gcc/input.c b/gcc/input.c
index 1c7228a..dcb5101 100644
--- a/gcc/input.c
+++ b/gcc/input.c
@@ -1985,6 +1985,7 @@ struct lexer_test
cpp_reader_ptr m_parser;
temp_source_file m_tempfile;
string_concat_db m_concats;
+ bool m_implicitly_expect_EOF;
};
/* Use an EBCDIC encoding for the execution charset, specifically
@@ -2046,6 +2047,54 @@ class ebcdic_execution_charset : public lexer_test_options
ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
+/* A lexer_test_options subclass that records a list of error
+ messages emitted by the lexer. */
+
+class lexer_error_sink : public lexer_test_options
+{
+ public:
+ lexer_error_sink ()
+ {
+ gcc_assert (s_singleton == NULL);
+ s_singleton = this;
+ }
+ ~lexer_error_sink ()
+ {
+ gcc_assert (s_singleton == this);
+ s_singleton = NULL;
+
+ int i;
+ char *str;
+ FOR_EACH_VEC_ELT (m_errors, i, str)
+ free (str);
+ }
+
+ void apply (lexer_test &test) FINAL OVERRIDE
+ {
+ cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
+ callbacks->error = on_error;
+ }
+
+ static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
+ int level ATTRIBUTE_UNUSED,
+ int reason ATTRIBUTE_UNUSED,
+ rich_location *richloc ATTRIBUTE_UNUSED,
+ const char *msgid, va_list *ap)
+ ATTRIBUTE_FPTR_PRINTF(5,0)
+ {
+ char *msg = xvasprintf (msgid, *ap);
+ s_singleton->m_errors.safe_push (msg);
+ return true;
+ }
+
+ auto_vec<char *> m_errors;
+
+ private:
+ static lexer_error_sink *s_singleton;
+};
+
+lexer_error_sink *lexer_error_sink::s_singleton;
+
/* Constructor. Override line_table with a new instance based on CASE_,
and write CONTENT to a tempfile. Create a cpp_reader, and use it to
start parsing the tempfile. */
@@ -2056,7 +2105,8 @@ lexer_test::lexer_test (const line_table_case &case_, const char *content,
m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
/* Create a tempfile and write the text to it. */
m_tempfile (SELFTEST_LOCATION, ".c", content),
- m_concats ()
+ m_concats (),
+ m_implicitly_expect_EOF (true)
{
if (options)
options->apply (*this);
@@ -2069,16 +2119,19 @@ lexer_test::lexer_test (const line_table_case &case_, const char *content,
ASSERT_NE (fname, NULL);
}
-/* Destructor. Verify that the next token in m_parser is EOF. */
+/* Destructor. By default, verify that the next token in m_parser is EOF. */
lexer_test::~lexer_test ()
{
location_t loc;
const cpp_token *tok;
- tok = cpp_get_token_with_location (m_parser, &loc);
- ASSERT_NE (tok, NULL);
- ASSERT_EQ (tok->type, CPP_EOF);
+ if (m_implicitly_expect_EOF)
+ {
+ tok = cpp_get_token_with_location (m_parser, &loc);
+ ASSERT_NE (tok, NULL);
+ ASSERT_EQ (tok->type, CPP_EOF);
+ }
}
/* Get the next token from m_parser. */
@@ -3247,6 +3300,31 @@ test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
"range endpoints are on different lines");
}
+/* Test of parsing an unterminated raw string. */
+
+static void
+test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
+{
+ const char *content = "R\"ouch()ouCh\" /* etc */";
+
+ lexer_error_sink errors;
+ lexer_test test (case_, content, &errors);
+ test.m_implicitly_expect_EOF = false;
+
+ /* Attempt to parse the raw string. */
+ const cpp_token *tok = test.get_token ();
+ ASSERT_EQ (tok->type, CPP_EOF);
+
+ ASSERT_EQ (1, errors.m_errors.length ());
+ /* We expect the message "unterminated raw string"
+ in the "cpplib" translation domain.
+ It's not clear that dgettext is available on all supported hosts,
+ so this assertion is commented-out for now.
+ ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
+ errors.m_errors[0]);
+ */
+}
+
/* Test of lexing char constants. */
static void
@@ -3390,6 +3468,7 @@ input_c_tests ()
for_each_line_table_case (test_lexer_string_locations_long_line);
for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
+ for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
for_each_line_table_case (test_lexer_char_constants);
test_reading_source_line ();
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 6368122..ba90cfe 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-15 David Malcolm <dmalcolm@redhat.com>
+
+ PR preprocessor/78680
+ PR preprocessor/78811
+ * lex.c (_cpp_lex_direct): Only determine the end-location of
+ the token and build a range for non-reserved start locations.
+ Do not do it for EOF tokens.
+
2016-12-12 David Malcolm <dmalcolm@redhat.com>
PR preprocessor/78680
diff --git a/libcpp/lex.c b/libcpp/lex.c
index ae45892..9b1bdf8 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -3089,25 +3089,27 @@ _cpp_lex_direct (cpp_reader *pfile)
break;
}
- /* Ensure that any line notes are processed, so that we have the
- correct physical line/column for the end-point of the token even
- when a logical line is split via one or more backslashes. */
- if (buffer->cur >= buffer->notes[buffer->cur_note].pos
- && !pfile->overlaid_buffer)
- _cpp_process_line_notes (pfile, false);
-
- source_range tok_range;
- tok_range.m_start = result->src_loc;
- if (result->src_loc >= RESERVED_LOCATION_COUNT)
- tok_range.m_finish
- = linemap_position_for_column (pfile->line_table,
- CPP_BUF_COLUMN (buffer, buffer->cur));
- else
- tok_range.m_finish = tok_range.m_start;
-
- result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
- result->src_loc,
- tok_range, NULL);
+ /* Potentially convert the location of the token to a range. */
+ if (result->src_loc >= RESERVED_LOCATION_COUNT
+ && result->type != CPP_EOF)
+ {
+ /* Ensure that any line notes are processed, so that we have the
+ correct physical line/column for the end-point of the token even
+ when a logical line is split via one or more backslashes. */
+ if (buffer->cur >= buffer->notes[buffer->cur_note].pos
+ && !pfile->overlaid_buffer)
+ _cpp_process_line_notes (pfile, false);
+
+ source_range tok_range;
+ tok_range.m_start = result->src_loc;
+ tok_range.m_finish
+ = linemap_position_for_column (pfile->line_table,
+ CPP_BUF_COLUMN (buffer, buffer->cur));
+
+ result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
+ result->src_loc,
+ tok_range, NULL);
+ }
return result;
}