aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/rust/lex/rust-codepoint.h2
-rw-r--r--gcc/rust/lex/rust-lex.h8
-rw-r--r--gcc/rust/rust-session-manager.cc34
-rw-r--r--gcc/rust/util/rust-unicode.cc23
-rw-r--r--gcc/rust/util/rust-unicode.h19
-rw-r--r--gcc/testsuite/rust/compile/bad-crate-name1.rs (renamed from gcc/testsuite/rust/compile/bad-crate-name.rs)0
-rw-r--r--gcc/testsuite/rust/compile/bad-crate-name2.rs2
7 files changed, 59 insertions, 29 deletions
diff --git a/gcc/rust/lex/rust-codepoint.h b/gcc/rust/lex/rust-codepoint.h
index e2d0571..755c837 100644
--- a/gcc/rust/lex/rust-codepoint.h
+++ b/gcc/rust/lex/rust-codepoint.h
@@ -22,6 +22,8 @@
#include "rust-system.h"
namespace Rust {
+
+// FIXME: move this to rust-unicode.h?
struct Codepoint
{
uint32_t value;
diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h
index 27286ac..91e814b 100644
--- a/gcc/rust/lex/rust-lex.h
+++ b/gcc/rust/lex/rust-lex.h
@@ -334,6 +334,14 @@ public:
return c;
}
}
+
+ tl::optional<std::vector<Codepoint>> get_chars ()
+ {
+ if (is_valid ())
+ return {chars};
+ else
+ return tl::nullopt;
+ }
};
class FileInputSource : public InputSource
diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc
index 4f779d1..3461198 100644
--- a/gcc/rust/rust-session-manager.cc
+++ b/gcc/rust/rust-session-manager.cc
@@ -42,6 +42,7 @@
#include "rust-early-name-resolver.h"
#include "rust-cfg-strip.h"
#include "rust-expand-visitor.h"
+#include "rust-unicode.h"
#include "diagnostic.h"
#include "input.h"
@@ -107,30 +108,39 @@ infer_crate_name (const std::string &filename)
return crate;
}
-/* Validate the crate name using the ASCII rules
- TODO: Support Unicode version of the rules */
+/* Validate the crate name using the ASCII rules */
static bool
validate_crate_name (const std::string &crate_name, Error &error)
{
- if (crate_name.empty ())
+ Utf8String utf8_name = {crate_name};
+ tl::optional<std::vector<Codepoint>> uchars_opt = utf8_name.get_chars ();
+
+ if (!uchars_opt.has_value ())
+ {
+ error = Error (UNDEF_LOCATION, "crate name is not a valid UTF-8 string");
+ return false;
+ }
+
+ std::vector<Codepoint> uchars = uchars_opt.value ();
+ if (uchars.empty ())
{
error = Error (UNDEF_LOCATION, "crate name cannot be empty");
return false;
}
- if (crate_name.length () > kMaxNameLength)
+ if (uchars.size () > kMaxNameLength)
{
error = Error (UNDEF_LOCATION, "crate name cannot exceed %lu characters",
(unsigned long) kMaxNameLength);
return false;
}
- for (auto &c : crate_name)
+ for (Codepoint &c : uchars)
{
- if (!(ISALNUM (c) || c == '_'))
+ if (!(is_alphabetic (c.value) || is_numeric (c.value) || c.value == '_'))
{
error = Error (UNDEF_LOCATION,
- "invalid character %<%c%> in crate name: %<%s%>", c,
- crate_name.c_str ());
+ "invalid character %<%s%> in crate name: %<%s%>",
+ c.as_string ().c_str (), crate_name.c_str ());
return false;
}
}
@@ -1273,13 +1283,17 @@ rust_crate_name_validation_test (void)
ASSERT_TRUE (Rust::validate_crate_name ("example", error));
ASSERT_TRUE (Rust::validate_crate_name ("abcdefg_1234", error));
ASSERT_TRUE (Rust::validate_crate_name ("1", error));
- // FIXME: The next test does not pass as of current implementation
- // ASSERT_TRUE (Rust::CompileOptions::validate_crate_name ("惊吓"));
+ ASSERT_TRUE (Rust::validate_crate_name ("クレート", error));
+ ASSERT_TRUE (Rust::validate_crate_name ("Sōkrátēs", error));
+ ASSERT_TRUE (Rust::validate_crate_name ("惊吓", error));
+
// NOTE: - is not allowed in the crate name ...
ASSERT_FALSE (Rust::validate_crate_name ("abcdefg-1234", error));
ASSERT_FALSE (Rust::validate_crate_name ("a+b", error));
ASSERT_FALSE (Rust::validate_crate_name ("/a+b/", error));
+ ASSERT_FALSE (Rust::validate_crate_name ("😸++", error));
+ ASSERT_FALSE (Rust::validate_crate_name ("∀", error));
/* Tests for crate name inference */
ASSERT_EQ (Rust::infer_crate_name ("c.rs"), "c");
diff --git a/gcc/rust/util/rust-unicode.cc b/gcc/rust/util/rust-unicode.cc
index 738e1f1..73e1abd 100644
--- a/gcc/rust/util/rust-unicode.cc
+++ b/gcc/rust/util/rust-unicode.cc
@@ -12,6 +12,7 @@ typedef std::vector<codepoint_t> string_t;
template <std::size_t SIZE>
int64_t
binary_search_ranges (
+ // FIXME: use binray search function from <algorithm>
const std::array<std::pair<uint32_t, uint32_t>, SIZE> &ranges,
uint32_t target_cp)
{
@@ -49,6 +50,7 @@ int64_t
binary_search_sorted_array (const std::array<uint32_t, SIZE> &array,
uint32_t target)
{
+ // FIXME: use binray search function from <algorithm>
if (SIZE == 0)
return -1;
@@ -104,9 +106,7 @@ recursive_decomp_cano (codepoint_t c, string_t &buf)
{
string_t decomped = it->second;
for (codepoint_t cp : decomped)
- {
- recursive_decomp_cano (cp, buf);
- }
+ recursive_decomp_cano (cp, buf);
}
else
buf.push_back (c);
@@ -152,8 +152,7 @@ recomp (string_t s)
if (s.size () > 0)
{
int last_class = -1;
- // int starter_pos = 0; // Assume the first character is Starter. Correct?
- // int target_pos = 1;
+ // Assume the first character is Starter.
codepoint_t starter_ch = s[0];
for (unsigned int src_pos = 1; src_pos < s.size (); src_pos++)
{
@@ -189,20 +188,6 @@ recomp (string_t s)
return buf;
}
-// TODO: remove
-/*
-void
-dump_string (std::vector<uint32_t> s)
-{
- std::cout << "dump=";
- for (auto c : s)
- {
- std::cout << std::hex << c << ", ";
- }
- std::cout << std::endl;
-}
-*/
-
string_t
nfc_normalize (string_t s)
{
diff --git a/gcc/rust/util/rust-unicode.h b/gcc/rust/util/rust-unicode.h
index 8c0bd06..6800558 100644
--- a/gcc/rust/util/rust-unicode.h
+++ b/gcc/rust/util/rust-unicode.h
@@ -19,10 +19,29 @@
#ifndef RUST_UNICODE_H
#define RUST_UNICODE_H
+#include "optional.h"
#include "rust-system.h"
+#include "rust-lex.h"
namespace Rust {
+class Utf8String
+{
+private:
+ tl::optional<std::vector<Codepoint>> chars;
+
+public:
+ Utf8String (const std::string &maybe_utf8)
+ {
+ Lexer::BufferInputSource input_source = {maybe_utf8, 0};
+ chars = input_source.get_chars ();
+ }
+
+ // Returns UTF codepoints when string is valid as UTF-8, returns nullopt
+ // otherwise.
+ tl::optional<std::vector<Codepoint>> get_chars () const { return chars; }
+};
+
// TODO: add function nfc_normalize
bool
diff --git a/gcc/testsuite/rust/compile/bad-crate-name.rs b/gcc/testsuite/rust/compile/bad-crate-name1.rs
index 6c59c255..6c59c255 100644
--- a/gcc/testsuite/rust/compile/bad-crate-name.rs
+++ b/gcc/testsuite/rust/compile/bad-crate-name1.rs
diff --git a/gcc/testsuite/rust/compile/bad-crate-name2.rs b/gcc/testsuite/rust/compile/bad-crate-name2.rs
new file mode 100644
index 0000000..1d80fa5
--- /dev/null
+++ b/gcc/testsuite/rust/compile/bad-crate-name2.rs
@@ -0,0 +1,2 @@
+#![crate_name = "😅"] // { dg-error "invalid character ...." "" }
+fn main() {}