From a3998c2fb1630638db83defcd2c680111d65b973 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 15 Dec 2016 18:05:05 +0000 Subject: Fix use-after-free lexing unterminated raw strings (PR preprocessor/78811) gcc/ChangeLog: PR preprocessor/78680 PR preprocessor/78811 * input.c (struct selftest::lexer_test): Add field m_implicitly_expect_EOF. (selftest::lexer_error_sink): New class. (selftest::lexer_error_sink::s_singleton): New global. (selftest::lexer_test::lexer_test): Initialize new field "m_implicitly_expect_EOF". (selftest::lexer_test::~lexer_test): Conditionalize the check for the EOF token on the new field. (selftest::test_lexer_string_locations_raw_string_unterminated): New function. (selftest::input_c_tests): Call the new test. libcpp/ChangeLog: PR preprocessor/78680 PR preprocessor/78811 * lex.c (_cpp_lex_direct): Only determine the end-location of the token and build a range for non-reserved start locations. Do not do it for EOF tokens. From-SVN: r243721 --- gcc/input.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 5 deletions(-) (limited to 'gcc/input.c') diff --git a/gcc/input.c b/gcc/input.c index 1c7228a..dcb5101 100644 --- a/gcc/input.c +++ b/gcc/input.c @@ -1985,6 +1985,7 @@ struct lexer_test cpp_reader_ptr m_parser; temp_source_file m_tempfile; string_concat_db m_concats; + bool m_implicitly_expect_EOF; }; /* Use an EBCDIC encoding for the execution charset, specifically @@ -2046,6 +2047,54 @@ class ebcdic_execution_charset : public lexer_test_options ebcdic_execution_charset *ebcdic_execution_charset::s_singleton; +/* A lexer_test_options subclass that records a list of error + messages emitted by the lexer. */ + +class lexer_error_sink : public lexer_test_options +{ + public: + lexer_error_sink () + { + gcc_assert (s_singleton == NULL); + s_singleton = this; + } + ~lexer_error_sink () + { + gcc_assert (s_singleton == this); + s_singleton = NULL; + + int i; + char *str; + FOR_EACH_VEC_ELT (m_errors, i, str) + free (str); + } + + void apply (lexer_test &test) FINAL OVERRIDE + { + cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser); + callbacks->error = on_error; + } + + static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED, + int level ATTRIBUTE_UNUSED, + int reason ATTRIBUTE_UNUSED, + rich_location *richloc ATTRIBUTE_UNUSED, + const char *msgid, va_list *ap) + ATTRIBUTE_FPTR_PRINTF(5,0) + { + char *msg = xvasprintf (msgid, *ap); + s_singleton->m_errors.safe_push (msg); + return true; + } + + auto_vec m_errors; + + private: + static lexer_error_sink *s_singleton; +}; + +lexer_error_sink *lexer_error_sink::s_singleton; + /* Constructor. Override line_table with a new instance based on CASE_, and write CONTENT to a tempfile. Create a cpp_reader, and use it to start parsing the tempfile. */ @@ -2056,7 +2105,8 @@ lexer_test::lexer_test (const line_table_case &case_, const char *content, m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)), /* Create a tempfile and write the text to it. */ m_tempfile (SELFTEST_LOCATION, ".c", content), - m_concats () + m_concats (), + m_implicitly_expect_EOF (true) { if (options) options->apply (*this); @@ -2069,16 +2119,19 @@ lexer_test::lexer_test (const line_table_case &case_, const char *content, ASSERT_NE (fname, NULL); } -/* Destructor. Verify that the next token in m_parser is EOF. */ +/* Destructor. By default, verify that the next token in m_parser is EOF. */ lexer_test::~lexer_test () { location_t loc; const cpp_token *tok; - tok = cpp_get_token_with_location (m_parser, &loc); - ASSERT_NE (tok, NULL); - ASSERT_EQ (tok->type, CPP_EOF); + if (m_implicitly_expect_EOF) + { + tok = cpp_get_token_with_location (m_parser, &loc); + ASSERT_NE (tok, NULL); + ASSERT_EQ (tok->type, CPP_EOF); + } } /* Get the next token from m_parser. */ @@ -3247,6 +3300,31 @@ test_lexer_string_locations_raw_string_multiline (const line_table_case &case_) "range endpoints are on different lines"); } +/* Test of parsing an unterminated raw string. */ + +static void +test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_) +{ + const char *content = "R\"ouch()ouCh\" /* etc */"; + + lexer_error_sink errors; + lexer_test test (case_, content, &errors); + test.m_implicitly_expect_EOF = false; + + /* Attempt to parse the raw string. */ + const cpp_token *tok = test.get_token (); + ASSERT_EQ (tok->type, CPP_EOF); + + ASSERT_EQ (1, errors.m_errors.length ()); + /* We expect the message "unterminated raw string" + in the "cpplib" translation domain. + It's not clear that dgettext is available on all supported hosts, + so this assertion is commented-out for now. + ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"), + errors.m_errors[0]); + */ +} + /* Test of lexing char constants. */ static void @@ -3390,6 +3468,7 @@ input_c_tests () for_each_line_table_case (test_lexer_string_locations_long_line); for_each_line_table_case (test_lexer_string_locations_raw_string_one_line); for_each_line_table_case (test_lexer_string_locations_raw_string_multiline); + for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated); for_each_line_table_case (test_lexer_char_constants); test_reading_source_line (); -- cgit v1.1