diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2021-08-08 21:57:02 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-08 21:57:02 +0000 |
commit | fa2aec10e60592348e1640b5bda0507b1324f235 (patch) | |
tree | 9d02fcb274469700637cdbde6e54e0fe81ff03e4 | |
parent | 6bd42365fa2f46430fe933deaecd879857415fc4 (diff) | |
parent | 007e6ecefb0b43d0b9e7bf85f75ec050b5c520e5 (diff) | |
download | gcc-fa2aec10e60592348e1640b5bda0507b1324f235.zip gcc-fa2aec10e60592348e1640b5bda0507b1324f235.tar.gz gcc-fa2aec10e60592348e1640b5bda0507b1324f235.tar.bz2 |
Merge #615
615: lex: accept zero codepoints in strings r=philberty a=dkm
From Mark Wielaard : https://gcc.gnu.org/pipermail/gcc-rust/2021-August/000127.html
>
> Zero characters (codepoints) are acceptable in strings. The current
> Lexer::parse_string skipped such zero codepoints by accidents. The
> zero codepoint was also used as error/skip indicator, but that is only
> true if the third argument of utf8_escape_pair is true (yes, it is
> called pair, but is a triple).
>
> Add a testcase that checks the (sub)strings are separated by zero
> chars. Since we cannot slice strings yet this uses extern "C"
> functions, printf and memchr.
Co-authored-by: Mark Wielaard <mark@klomp.org>
-rw-r--r-- | gcc/rust/lex/rust-lex.cc | 3 | ||||
-rw-r--r-- | gcc/testsuite/rust/execute/torture/str-zero.rs | 26 |
2 files changed, 28 insertions, 1 deletions
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 0b8a8ea..49b6b6d 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -1827,7 +1827,8 @@ Lexer::parse_string (Location loc) else length += std::get<1> (utf8_escape_pair); - if (current_char32 != Codepoint (0)) + if (current_char32 != Codepoint (0) + || !std::get<2> (utf8_escape_pair)) str += current_char32; // required as parsing utf8 escape only changes current_char diff --git a/gcc/testsuite/rust/execute/torture/str-zero.rs b/gcc/testsuite/rust/execute/torture/str-zero.rs new file mode 100644 index 0000000..e7fba0d --- /dev/null +++ b/gcc/testsuite/rust/execute/torture/str-zero.rs @@ -0,0 +1,26 @@ +/* { dg-output "bar foo baz foobar\n" } */ +extern "C" +{ + fn printf(s: *const i8, ...); + fn memchr(s: *const i8, c: u8, n: usize) -> *const i8; +} + +pub fn main () -> i32 +{ + let f = "%s %s %s %s\n\0"; + let s = "bar\0\ + foo\ + \x00\ + baz\u{0000}\ + foobar\0"; + let cf = f as *const str as *const i8; + let cs = s as *const str as *const i8; + unsafe + { + let cs2 = memchr (cs, b'f', 5); + let cs3 = memchr (cs2, b'b', 5); + let cs4 = memchr (cs3, b'f', 5); + printf (cf, cs, cs2, cs3, cs4); + } + 0 +} |