aboutsummaryrefslogtreecommitdiff
path: root/gcc/rust/util/rust-unicode.h
diff options
context:
space:
mode:
authorRaiki Tamura <tamaron1203@gmail.com>2023-07-30 19:54:36 +0900
committerP-E-P <32375388+P-E-P@users.noreply.github.com>2023-08-09 09:23:03 +0000
commita4b7e7375facb701585b2989ef3490528ec5bc9f (patch)
tree188247f431c075ef10ec6e67277375849c3039a8 /gcc/rust/util/rust-unicode.h
parent74b83511e4ea68cc491d1f174a763bd87b449796 (diff)
downloadgcc-a4b7e7375facb701585b2989ef3490528ec5bc9f.zip
gcc-a4b7e7375facb701585b2989ef3490528ec5bc9f.tar.gz
gcc-a4b7e7375facb701585b2989ef3490528ec5bc9f.tar.bz2
gccrs: Normalize all identifier tokens
gcc/rust/ChangeLog: * lex/rust-lex.cc (assert_source_content): Fix namespace specifier (test_buffer_input_source): Likewise. (test_file_input_source): Likewise. * lex/rust-lex.h: Move InputSource ... * lex/rust-input-source.h: ... to here. (New file) * lex/rust-token.cc (nfc_normalize_token_string): New function * lex/rust-token.h (nfc_normalize_token_string): New function * rust-lang.cc (run_rust_tests): Modify order of selftests. * rust-session-manager.cc (validate_crate_name): Modify interface of Utf8String. * util/rust-unicode.cc (lookup_cc): Modify codepoint_t typedef. (lookup_recomp): Likewise. (recursive_decomp_cano): Likewise. (decomp_cano): Likewise. (sort_cano): Likewise. (compose_hangul): Likewise. (assert_normalize): Likewise. (Utf8String::nfc_normalize): New function. * util/rust-unicode.h: Modify interface of Utf8String. gcc/testsuite/ChangeLog: * rust/compile/unicode_norm1.rs: New test. Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
Diffstat (limited to 'gcc/rust/util/rust-unicode.h')
-rw-r--r--gcc/rust/util/rust-unicode.h35
1 files changed, 25 insertions, 10 deletions
diff --git a/gcc/rust/util/rust-unicode.h b/gcc/rust/util/rust-unicode.h
index 6800558..becf6fb 100644
--- a/gcc/rust/util/rust-unicode.h
+++ b/gcc/rust/util/rust-unicode.h
@@ -21,28 +21,43 @@
#include "optional.h"
#include "rust-system.h"
-#include "rust-lex.h"
+#include "rust-input-source.h"
namespace Rust {
class Utf8String
{
private:
- tl::optional<std::vector<Codepoint>> chars;
+ std::vector<Codepoint> chars;
public:
- Utf8String (const std::string &maybe_utf8)
+ static tl::optional<Utf8String>
+ make_utf8_string (const std::string &maybe_utf8)
{
- Lexer::BufferInputSource input_source = {maybe_utf8, 0};
- chars = input_source.get_chars ();
+ BufferInputSource input_source = {maybe_utf8, 0};
+ tl::optional<std::vector<Codepoint>> chars_opt = input_source.get_chars ();
+ if (chars_opt.has_value ())
+ return {Utf8String (chars_opt.value ())};
+ else
+ return tl::nullopt;
}
- // Returns UTF codepoints when string is valid as UTF-8, returns nullopt
- // otherwise.
- tl::optional<std::vector<Codepoint>> get_chars () const { return chars; }
-};
+ Utf8String (const std::vector<Codepoint> codepoints) : chars ({codepoints}) {}
+
+ std::string as_string () const
+ {
+ std::stringstream ss;
+ for (Codepoint c : chars)
+ ss << c.as_string ();
-// TODO: add function nfc_normalize
+ return ss.str ();
+ };
+
+ // Returns characters
+ std::vector<Codepoint> get_chars () const { return chars; }
+
+ Utf8String nfc_normalize () const;
+};
bool
is_alphabetic (uint32_t codepoint);