From 007e6ecefb0b43d0b9e7bf85f75ec050b5c520e5 Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Sat, 7 Aug 2021 17:32:41 +0200 Subject: lex: accept zero codepoints in strings Zero characters (codepoints) are acceptable in strings. The current Lexer::parse_string skipped such zero codepoints by accidents. The zero codepoint was also used as error/skip indicator, but that is only true if the third argument of utf8_escape_pair is true (yes, it is called pair, but is a triple). Add a testcase that checks the (sub)strings are separated by zero chars. Since we cannot slice strings yet this uses extern "C" functions, printf and memchr. --- gcc/rust/lex/rust-lex.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'gcc/rust') diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 0b8a8ea..49b6b6d 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -1827,7 +1827,8 @@ Lexer::parse_string (Location loc) else length += std::get<1> (utf8_escape_pair); - if (current_char32 != Codepoint (0)) + if (current_char32 != Codepoint (0) + || !std::get<2> (utf8_escape_pair)) str += current_char32; // required as parsing utf8 escape only changes current_char -- cgit v1.1