aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorOwen Avery <powerboat9.gamer@gmail.com>2023-01-08 17:19:12 -0500
committerArthur Cohen <arthur.cohen@embecosm.com>2023-04-06 10:47:23 +0200
commitfd406fc7046f427385b644759265ae06ed741d6b (patch)
treef33b42bc2dfb10af4854c6fc95dbadfcc83ff488 /gcc
parent20529dff93f10f782b9dbee51fa58928945d0be3 (diff)
downloadgcc-fd406fc7046f427385b644759265ae06ed741d6b.zip
gcc-fd406fc7046f427385b644759265ae06ed741d6b.tar.gz
gcc-fd406fc7046f427385b644759265ae06ed741d6b.tar.bz2
gccrs: Implemented UTF-8 checking for include_str!()
gcc/rust/ChangeLog: * expand/rust-macro-builtins.cc (MacroBuiltin::include_str_handler): Add check for valid UTF-8. gcc/testsuite/ChangeLog: * rust/compile/builtin_macro_include_str.rs: Include test of invalid UTF-8. * rust/compile/invalid_utf8: File with invalid UTF-8. Signed-off-by: Owen Avery <powerboat9.gamer@gmail.com>
Diffstat (limited to 'gcc')
-rw-r--r--gcc/rust/expand/rust-macro-builtins.cc51
-rw-r--r--gcc/testsuite/rust/compile/builtin_macro_include_str.rs1
-rw-r--r--gcc/testsuite/rust/compile/invalid_utf81
3 files changed, 51 insertions, 2 deletions
diff --git a/gcc/rust/expand/rust-macro-builtins.cc b/gcc/rust/expand/rust-macro-builtins.cc
index e594a25..3b6f69b 100644
--- a/gcc/rust/expand/rust-macro-builtins.cc
+++ b/gcc/rust/expand/rust-macro-builtins.cc
@@ -389,8 +389,55 @@ MacroBuiltin::include_str_handler (Location invoc_locus,
std::vector<uint8_t> bytes = load_file_bytes (target_filename.c_str ());
- /* FIXME: Enforce that the file contents are valid UTF-8. */
- std::string str ((const char *) &bytes[0], bytes.size ());
+ /* FIXME: reuse lexer */
+ int expect_single = 0;
+ for (uint8_t b : bytes)
+ {
+ if (expect_single)
+ {
+ if ((b & 0xC0) != 0x80)
+ /* character was truncated, exit with expect_single != 0 */
+ break;
+ expect_single--;
+ }
+ else if (b & 0x80)
+ {
+ if (b >= 0xF8)
+ {
+ /* more than 4 leading 1s */
+ expect_single = 1;
+ break;
+ }
+ else if (b >= 0xF0)
+ {
+ /* 4 leading 1s */
+ expect_single = 3;
+ }
+ else if (b >= 0xE0)
+ {
+ /* 3 leading 1s */
+ expect_single = 2;
+ }
+ else if (b >= 0xC0)
+ {
+ /* 2 leading 1s */
+ expect_single = 1;
+ }
+ else
+ {
+ /* only 1 leading 1 */
+ expect_single = 1;
+ break;
+ }
+ }
+ }
+
+ std::string str;
+ if (expect_single)
+ rust_error_at (invoc_locus, "%s was not a valid utf-8 file",
+ target_filename.c_str ());
+ else
+ str = std::string ((const char *) &bytes[0], bytes.size ());
auto node = AST::SingleASTNode (make_string (invoc_locus, str));
auto str_tok = make_token (Token::make_string (invoc_locus, std::move (str)));
diff --git a/gcc/testsuite/rust/compile/builtin_macro_include_str.rs b/gcc/testsuite/rust/compile/builtin_macro_include_str.rs
index 38f5e3b..8092193 100644
--- a/gcc/testsuite/rust/compile/builtin_macro_include_str.rs
+++ b/gcc/testsuite/rust/compile/builtin_macro_include_str.rs
@@ -10,4 +10,5 @@ fn main () {
include_str! ("foo.txt", "bar.txt"); // { dg-error "macro takes 1 argument" "" }
include_str! ("builtin_macro_include_str.rs"); // ok
include_str! ("builtin_macro_include_str.rs",); // trailing comma ok
+ include_str! ("invalid_utf8"); // { dg-error "invalid_utf8 was not a valid utf-8 file" "" }
}
diff --git a/gcc/testsuite/rust/compile/invalid_utf8 b/gcc/testsuite/rust/compile/invalid_utf8
new file mode 100644
index 0000000..29e181e
--- /dev/null
+++ b/gcc/testsuite/rust/compile/invalid_utf8
@@ -0,0 +1 @@
+ÿ