aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbors[bot] <26634292+bors[bot]@users.noreply.github.com>2021-08-08 21:57:02 +0000
committerGitHub <noreply@github.com>2021-08-08 21:57:02 +0000
commitfa2aec10e60592348e1640b5bda0507b1324f235 (patch)
tree9d02fcb274469700637cdbde6e54e0fe81ff03e4
parent6bd42365fa2f46430fe933deaecd879857415fc4 (diff)
parent007e6ecefb0b43d0b9e7bf85f75ec050b5c520e5 (diff)
downloadgcc-fa2aec10e60592348e1640b5bda0507b1324f235.zip
gcc-fa2aec10e60592348e1640b5bda0507b1324f235.tar.gz
gcc-fa2aec10e60592348e1640b5bda0507b1324f235.tar.bz2
Merge #615
615: lex: accept zero codepoints in strings r=philberty a=dkm From Mark Wielaard : https://gcc.gnu.org/pipermail/gcc-rust/2021-August/000127.html > > Zero characters (codepoints) are acceptable in strings. The current > Lexer::parse_string skipped such zero codepoints by accidents. The > zero codepoint was also used as error/skip indicator, but that is only > true if the third argument of utf8_escape_pair is true (yes, it is > called pair, but is a triple). > > Add a testcase that checks the (sub)strings are separated by zero > chars. Since we cannot slice strings yet this uses extern "C" > functions, printf and memchr. Co-authored-by: Mark Wielaard <mark@klomp.org>
-rw-r--r--gcc/rust/lex/rust-lex.cc3
-rw-r--r--gcc/testsuite/rust/execute/torture/str-zero.rs26
2 files changed, 28 insertions, 1 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index 0b8a8ea..49b6b6d 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -1827,7 +1827,8 @@ Lexer::parse_string (Location loc)
else
length += std::get<1> (utf8_escape_pair);
- if (current_char32 != Codepoint (0))
+ if (current_char32 != Codepoint (0)
+ || !std::get<2> (utf8_escape_pair))
str += current_char32;
// required as parsing utf8 escape only changes current_char
diff --git a/gcc/testsuite/rust/execute/torture/str-zero.rs b/gcc/testsuite/rust/execute/torture/str-zero.rs
new file mode 100644
index 0000000..e7fba0d
--- /dev/null
+++ b/gcc/testsuite/rust/execute/torture/str-zero.rs
@@ -0,0 +1,26 @@
+/* { dg-output "bar foo baz foobar\n" } */
+extern "C"
+{
+ fn printf(s: *const i8, ...);
+ fn memchr(s: *const i8, c: u8, n: usize) -> *const i8;
+}
+
+pub fn main () -> i32
+{
+ let f = "%s %s %s %s\n\0";
+ let s = "bar\0\
+ foo\
+ \x00\
+ baz\u{0000}\
+ foobar\0";
+ let cf = f as *const str as *const i8;
+ let cs = s as *const str as *const i8;
+ unsafe
+ {
+ let cs2 = memchr (cs, b'f', 5);
+ let cs3 = memchr (cs2, b'b', 5);
+ let cs4 = memchr (cs3, b'f', 5);
+ printf (cf, cs, cs2, cs3, cs4);
+ }
+ 0
+}