aboutsummaryrefslogtreecommitdiff
path: root/gcc/rust
diff options
context:
space:
mode:
authorRaiki Tamura <tamaron1203@gmail.com>2023-08-06 19:17:17 +0900
committerArthur Cohen <arthur.cohen@embecosm.com>2024-01-16 19:00:30 +0100
commit5b47923fe512f088a4f1c31466236843c20b7ff9 (patch)
tree4c3258bbf931ea7093e47d8c7008bcc35d9dbeaa /gcc/rust
parent1e288d66cb4f0a25a249c0c6dacc2efbf9e44dc8 (diff)
downloadgcc-5b47923fe512f088a4f1c31466236843c20b7ff9.zip
gcc-5b47923fe512f088a4f1c31466236843c20b7ff9.tar.gz
gcc-5b47923fe512f088a4f1c31466236843c20b7ff9.tar.bz2
gccrs: clean up Codepoint and InputSource
gcc/rust/ChangeLog: * lex/rust-codepoint.h: Moved to... * util/rust-codepoint.h: ...here. * lex/rust-input-source.h: Add missing license * util/rust-unicode.cc: Add missing license * util/rust-punycode.cc (extract_basic_string): Remove constant Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
Diffstat (limited to 'gcc/rust')
-rw-r--r--gcc/rust/lex/rust-input-source.h70
-rw-r--r--gcc/rust/util/rust-codepoint.h (renamed from gcc/rust/lex/rust-codepoint.h)0
-rw-r--r--gcc/rust/util/rust-punycode.cc4
-rw-r--r--gcc/rust/util/rust-unicode.cc18
4 files changed, 66 insertions, 26 deletions
diff --git a/gcc/rust/lex/rust-input-source.h b/gcc/rust/lex/rust-input-source.h
index 07137de..32261a0 100644
--- a/gcc/rust/lex/rust-input-source.h
+++ b/gcc/rust/lex/rust-input-source.h
@@ -1,3 +1,21 @@
+// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+
+// This file is part of GCC.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
#ifndef RUST_INPUT_SOURCE_H
#define RUST_INPUT_SOURCE_H
@@ -5,6 +23,14 @@
#include "optional.h"
namespace Rust {
+
+constexpr uint8_t UTF8_BOM1 = 0xEF;
+constexpr uint8_t UTF8_BOM2 = 0xBB;
+constexpr uint8_t UTF8_BOM3 = 0xBF;
+
+constexpr uint32_t MAX_ASCII_CODEPOINT = 0x7F;
+constexpr uint32_t CODEPOINT_INVALID = 0xFFFE;
+
// Input source wrapper thing.
class InputSource
{
@@ -23,7 +49,7 @@ private:
if ((int32_t) input == EOF)
return Codepoint::eof ();
- else if (input < 128)
+ else if (input <= MAX_ASCII_CODEPOINT)
{
// ascii -- 1 byte
return {input};
@@ -31,14 +57,14 @@ private:
else if ((input & 0xC0) == 0x80)
{
// invalid (continuation; can't be first char)
- return {0xFFFE};
+ return {CODEPOINT_INVALID};
}
else if ((input & 0xE0) == 0xC0)
{
// 2 bytes
uint8_t input2 = next_byte ();
if ((input2 & 0xC0) != 0x80)
- return {0xFFFE};
+ return {CODEPOINT_INVALID};
uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0);
return output;
@@ -50,23 +76,23 @@ private:
// If the second byte is equal to 0xBB then the input is no longer a
// valid UTF-8 char. Then, we check if the third byte makes up a UTF
// BOM.
- if (input == 0xEF && input2 == 0xBB)
+ if (input == UTF8_BOM1 && input2 == UTF8_BOM2)
{
uint8_t input3 = next_byte ();
- if (input3 == 0xBF)
+ if (input3 == UTF8_BOM3)
// found BOM
return next_codepoint ();
else
- return {0xFFFE};
+ return {CODEPOINT_INVALID};
}
if ((input2 & 0xC0) != 0x80)
- return {0xFFFE};
+ return {CODEPOINT_INVALID};
uint8_t input3 = next_byte ();
if ((input3 & 0xC0) != 0x80)
- return {0xFFFE};
+ return {CODEPOINT_INVALID};
uint32_t output = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6)
| ((input3 & 0x3F) << 0);
@@ -77,15 +103,15 @@ private:
// 4 bytes
uint8_t input2 = next_byte ();
if ((input2 & 0xC0) != 0x80)
- return {0xFFFE};
+ return {CODEPOINT_INVALID};
uint8_t input3 = next_byte ();
if ((input3 & 0xC0) != 0x80)
- return {0xFFFE};
+ return {CODEPOINT_INVALID};
uint8_t input4 = next_byte ();
if ((input4 & 0xC0) != 0x80)
- return {0xFFFE};
+ return {CODEPOINT_INVALID};
uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12)
| ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0);
@@ -93,23 +119,26 @@ private:
}
else
{
- return {0xFFFE};
+ return {CODEPOINT_INVALID};
}
}
protected:
- // Check if the input source is valid as utf-8 and copy all characters to
- // `chars`.
+ // This method must be called by the constructor to initialize the input
+ // source. We cannot move this to the constructor because it calls a
+ // virtual method .
void init ()
{
+ // Check if the input source is valid as utf-8 and copy all characters to
+ // `chars`.
Codepoint char32 = next_codepoint ();
- while (!char32.is_eof () && char32 != 0xFFFE)
+ while (!char32.is_eof () && char32 != CODEPOINT_INVALID)
{
chars.push_back (char32);
char32 = next_codepoint ();
}
- if (char32 == 0xFFFE)
+ if (char32 == CODEPOINT_INVALID)
{
// Input source is not valid as utf-8.
is_valid_utf8 = false;
@@ -158,11 +187,7 @@ private:
public:
// Create new input source from file.
- FileInputSource (FILE *input) : InputSource (), input (input)
- {
- // TODO make this better?
- init ();
- }
+ FileInputSource (FILE *input) : InputSource (), input (input) { init (); }
};
class BufferInputSource : public InputSource
@@ -175,7 +200,7 @@ private:
{
if (offs >= buffer.size ())
return EOF;
- return (uint8_t) buffer.at (offs++);
+ return static_cast<uint8_t> (buffer.at (offs++));
}
public:
@@ -183,7 +208,6 @@ public:
BufferInputSource (const std::string &b, size_t offset)
: InputSource (), buffer (b), offs (offset)
{
- // TODO make this better?
init ();
}
};
diff --git a/gcc/rust/lex/rust-codepoint.h b/gcc/rust/util/rust-codepoint.h
index 755c837..755c837 100644
--- a/gcc/rust/lex/rust-codepoint.h
+++ b/gcc/rust/util/rust-codepoint.h
diff --git a/gcc/rust/util/rust-punycode.cc b/gcc/rust/util/rust-punycode.cc
index a35d54a..6c796ab 100644
--- a/gcc/rust/util/rust-punycode.cc
+++ b/gcc/rust/util/rust-punycode.cc
@@ -36,15 +36,13 @@ constexpr uint32_t INITIAL_BIAS = 72;
constexpr uint32_t INITIAL_N = 128;
constexpr char DELIMITER = '-';
-constexpr uint32_t MAX_ASCII_CODEPOINT = 0x7F;
-
std::string
extract_basic_string (const std::vector<Codepoint> &src)
{
std::string basic_string;
for (auto c : src)
{
- if (c.value <= MAX_ASCII_CODEPOINT)
+ if (c.value <= 0x7F)
basic_string += c.as_string ();
}
return basic_string;
diff --git a/gcc/rust/util/rust-unicode.cc b/gcc/rust/util/rust-unicode.cc
index b2ddaf0..95653cb 100644
--- a/gcc/rust/util/rust-unicode.cc
+++ b/gcc/rust/util/rust-unicode.cc
@@ -1,3 +1,21 @@
+// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+
+// This file is part of GCC.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
#include "rust-system.h"
#include "optional.h"
#include "selftest.h"