diff options
author | Owen Avery <powerboat9.gamer@gmail.com> | 2023-01-08 17:19:12 -0500 |
---|---|---|
committer | Philip Herron <philip.herron@embecosm.com> | 2023-02-14 22:00:59 +0000 |
commit | 92d6dc8443acbd273333cab411bfd68eaca56a89 (patch) | |
tree | a490a24c931e81646fb7c7e0e22ba3e887b0f2d5 /gcc/rust | |
parent | 665e53cc3e244c9b9fd1e919d7f75576d4198be0 (diff) | |
download | gcc-92d6dc8443acbd273333cab411bfd68eaca56a89.zip gcc-92d6dc8443acbd273333cab411bfd68eaca56a89.tar.gz gcc-92d6dc8443acbd273333cab411bfd68eaca56a89.tar.bz2 |
Implemented UTF-8 checking for include_str!()
gcc/rust/ChangeLog:
* expand/rust-macro-builtins.cc
(MacroBuiltin::include_str_handler): Add check for valid UTF-8.
gcc/testsuite/ChangeLog:
* rust/compile/builtin_macro_include_str.rs:
Include test of invalid UTF-8.
* rust/compile/invalid_utf8: File with invalid UTF-8.
Signed-off-by: Owen Avery <powerboat9.gamer@gmail.com>
Diffstat (limited to 'gcc/rust')
-rw-r--r-- | gcc/rust/expand/rust-macro-builtins.cc | 51 |
1 files changed, 49 insertions, 2 deletions
diff --git a/gcc/rust/expand/rust-macro-builtins.cc b/gcc/rust/expand/rust-macro-builtins.cc index e594a25..3b6f69b 100644 --- a/gcc/rust/expand/rust-macro-builtins.cc +++ b/gcc/rust/expand/rust-macro-builtins.cc @@ -389,8 +389,55 @@ MacroBuiltin::include_str_handler (Location invoc_locus, std::vector<uint8_t> bytes = load_file_bytes (target_filename.c_str ()); - /* FIXME: Enforce that the file contents are valid UTF-8. */ - std::string str ((const char *) &bytes[0], bytes.size ()); + /* FIXME: reuse lexer */ + int expect_single = 0; + for (uint8_t b : bytes) + { + if (expect_single) + { + if ((b & 0xC0) != 0x80) + /* character was truncated, exit with expect_single != 0 */ + break; + expect_single--; + } + else if (b & 0x80) + { + if (b >= 0xF8) + { + /* more than 4 leading 1s */ + expect_single = 1; + break; + } + else if (b >= 0xF0) + { + /* 4 leading 1s */ + expect_single = 3; + } + else if (b >= 0xE0) + { + /* 3 leading 1s */ + expect_single = 2; + } + else if (b >= 0xC0) + { + /* 2 leading 1s */ + expect_single = 1; + } + else + { + /* only 1 leading 1 */ + expect_single = 1; + break; + } + } + } + + std::string str; + if (expect_single) + rust_error_at (invoc_locus, "%s was not a valid utf-8 file", + target_filename.c_str ()); + else + str = std::string ((const char *) &bytes[0], bytes.size ()); auto node = AST::SingleASTNode (make_string (invoc_locus, str)); auto str_tok = make_token (Token::make_string (invoc_locus, std::move (str))); |