aboutsummaryrefslogtreecommitdiff
path: root/gcc/rust/util/rust-unicode.h
blob: 6a6526db30909d6c68ea4ecce054b466658406fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
// Copyright (C) 2020-2025 Free Software Foundation, Inc.

// This file is part of GCC.

// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.

// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// for more details.

// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3.  If not see
// <http://www.gnu.org/licenses/>.

#ifndef RUST_UNICODE_H
#define RUST_UNICODE_H

#include "optional.h"
#include "rust-system.h"
#include "rust-input-source.h"

namespace Rust {

class Utf8String
{
private:
  std::vector<Codepoint> chars;

public:
  static tl::optional<Utf8String>
  make_utf8_string (const std::string &maybe_utf8)
  {
    BufferInputSource input_source = {maybe_utf8, 0};
    tl::optional<std::vector<Codepoint>> chars_opt = input_source.get_chars ();
    if (chars_opt.has_value ())
      return {Utf8String (chars_opt.value ())};
    else
      return tl::nullopt;
  }

  Utf8String (const std::vector<Codepoint> codepoints) : chars ({codepoints}) {}

  std::string as_string () const
  {
    std::stringstream ss;
    for (Codepoint c : chars)
      ss << c.as_string ();

    return ss.str ();
  };

  // Returns characters
  std::vector<Codepoint> get_chars () const { return chars; }

  Utf8String nfc_normalize () const;
};

bool
is_alphabetic (uint32_t codepoint);

bool
is_ascii_only (const std::string &str);

bool
is_numeric (uint32_t codepoint);

bool
is_nfc_qc_no (uint32_t codepoint);

bool
is_nfc_qc_maybe (uint32_t codepoint);

enum class QuickCheckResult
{
  YES,
  NO,
  MAYBE
};

QuickCheckResult
nfc_quick_check (const std::vector<Codepoint> &s);

} // namespace Rust

#if CHECKING_P

namespace selftest {

void
rust_nfc_qc_test ();

void
rust_utf8_normalize_test ();

void
rust_utf8_property_test ();

} // namespace selftest

#endif // CHECKING_P

#endif