aboutsummaryrefslogtreecommitdiff
path: root/gcc/rust/util/rust-unicode.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/rust/util/rust-unicode.cc')
-rw-r--r--gcc/rust/util/rust-unicode.cc41
1 files changed, 25 insertions, 16 deletions
diff --git a/gcc/rust/util/rust-unicode.cc b/gcc/rust/util/rust-unicode.cc
index c6aa063..b2ddaf0 100644
--- a/gcc/rust/util/rust-unicode.cc
+++ b/gcc/rust/util/rust-unicode.cc
@@ -1,12 +1,14 @@
#include "rust-system.h"
#include "optional.h"
#include "selftest.h"
+#include "rust-lex.h"
+#include "rust-unicode.h"
#include "rust-unicode-data.h"
namespace Rust {
-typedef uint32_t codepoint_t;
+typedef Codepoint codepoint_t;
typedef std::vector<codepoint_t> string_t;
// These constants are used to compose and decompose of Hangul syllables.
@@ -85,7 +87,7 @@ binary_search_sorted_array (const std::array<uint32_t, SIZE> &array,
int
lookup_cc (codepoint_t c)
{
- auto it = Rust::CCC_TABLE.find (c);
+ auto it = Rust::CCC_TABLE.find (c.value);
if (it != Rust::CCC_TABLE.end ())
return it->second;
else
@@ -96,11 +98,11 @@ lookup_cc (codepoint_t c)
tl::optional<codepoint_t>
lookup_recomp (codepoint_t starter, codepoint_t c)
{
- auto it = Rust::RECOMPOSITION_MAP.find ({starter, c});
+ auto it = Rust::RECOMPOSITION_MAP.find ({starter.value, c.value});
if (it != Rust::RECOMPOSITION_MAP.end ())
return {it->second};
- it = Rust::RECOMPOSITION_MAP.find ({starter, 0});
+ it = Rust::RECOMPOSITION_MAP.find ({starter.value, 0});
if (it != Rust::RECOMPOSITION_MAP.end ())
return {it->second};
@@ -110,11 +112,11 @@ lookup_recomp (codepoint_t starter, codepoint_t c)
void
recursive_decomp_cano (codepoint_t c, string_t &buf)
{
- auto it = Rust::DECOMPOSITION_MAP.find (c);
+ auto it = Rust::DECOMPOSITION_MAP.find (c.value);
if (it != Rust::DECOMPOSITION_MAP.end ())
{
- string_t decomped = it->second;
- for (codepoint_t cp : decomped)
+ std::vector<uint32_t> decomped = it->second;
+ for (uint32_t cp : decomped)
recursive_decomp_cano (cp, buf);
}
else
@@ -127,7 +129,7 @@ decomp_cano (string_t s)
string_t buf;
for (codepoint_t c : s)
{
- int64_t s_index = c - S_BASE;
+ int64_t s_index = c.value - S_BASE;
if (0 <= s_index && s_index < S_COUNT)
{
// decompose Hangul argorithmically
@@ -160,7 +162,7 @@ sort_cano (string_t &s)
if (cc_here > 0 && cc_prev > 0 && cc_prev > cc_here)
{
// swap
- int tmp = s[i];
+ codepoint_t tmp = s[i];
s[i] = s[i - 1];
s[i - 1] = tmp;
if (i > 1)
@@ -183,10 +185,10 @@ compose_hangul (string_t s)
codepoint_t ch = s[src_pos];
// L V => LV
- int64_t l_index = last - L_BASE;
+ int64_t l_index = last.value - L_BASE;
if (0 <= l_index && l_index < L_COUNT)
{
- int64_t v_index = ch - V_BASE;
+ int64_t v_index = ch.value - V_BASE;
if (0 <= v_index && v_index < V_COUNT)
{
last = S_BASE + (l_index * V_COUNT + v_index) * T_COUNT;
@@ -198,13 +200,13 @@ compose_hangul (string_t s)
}
// LV T => LVT
- int64_t s_index = last - S_BASE;
+ int64_t s_index = last.value - S_BASE;
if (0 <= s_index && s_index < S_COUNT && (s_index % T_COUNT) == 0)
{
- int64_t t_index = ch - T_BASE;
+ int64_t t_index = ch.value - T_BASE;
if (0 < t_index && t_index < T_COUNT)
{
- last += t_index;
+ last.value += t_index;
// pop LV
buf.pop_back ();
buf.push_back (last);
@@ -282,6 +284,12 @@ nfc_normalize (string_t s)
return r;
}
+Utf8String
+Utf8String::nfc_normalize () const
+{
+ return Utf8String (Rust::nfc_normalize (chars));
+}
+
bool
is_alphabetic (uint32_t codepoint)
{
@@ -309,9 +317,10 @@ is_numeric (uint32_t codepoint)
namespace selftest {
void
-assert_normalize (std::vector<uint32_t> origin, std::vector<uint32_t> expected)
+assert_normalize (const std::vector<Rust::Codepoint> origin,
+ const std::vector<Rust::Codepoint> expected)
{
- std::vector<uint32_t> actual = Rust::nfc_normalize (origin);
+ std::vector<Rust::Codepoint> actual = Rust::nfc_normalize (origin);
ASSERT_EQ (actual.size (), expected.size ());
for (unsigned int i = 0; i < actual.size (); i++)