aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorSimplyTheOther <simplytheother@gmail.com>2020-08-20 11:51:25 +0800
committerPhilip Herron <philip.herron@embecosm.com>2020-11-28 21:13:20 +0000
commitb758ec724cc06cb866a72ce17dbfd8a426cf21db (patch)
treecffb4e7c584f7da75ea42e57011c5a1d160d67a2 /gcc
parentc26f60f6a28394e98ac1d830cbe8f632ef576dbb (diff)
downloadgcc-b758ec724cc06cb866a72ce17dbfd8a426cf21db.zip
gcc-b758ec724cc06cb866a72ce17dbfd8a426cf21db.tar.gz
gcc-b758ec724cc06cb866a72ce17dbfd8a426cf21db.tar.bz2
Lexer cleanup
Diffstat (limited to 'gcc')
-rw-r--r--gcc/rust/lex/rust-codepoint.h23
-rw-r--r--gcc/rust/lex/rust-lex.cc748
-rw-r--r--gcc/rust/lex/rust-lex.h50
-rw-r--r--gcc/rust/lex/rust-token.h1
-rw-r--r--gcc/rust/rust-linemap.cc3
-rw-r--r--gcc/rust/rust-session-manager.cc6
6 files changed, 426 insertions, 405 deletions
diff --git a/gcc/rust/lex/rust-codepoint.h b/gcc/rust/lex/rust-codepoint.h
index 0f2e5bd..d95bfdf 100644
--- a/gcc/rust/lex/rust-codepoint.h
+++ b/gcc/rust/lex/rust-codepoint.h
@@ -1,11 +1,6 @@
#ifndef RUST_CODEPOINT_H
#define RUST_CODEPOINT_H
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-// config, system, coretypes - TODO: ensure all are needed
-
#include <string>
namespace Rust {
@@ -16,11 +11,21 @@ struct Codepoint
// Creates a zero codepoint.
Codepoint () : value (0) {}
- // Creates a codepoint from UTF-8 value.
- Codepoint (uint32_t value_) : value (value_) {}
+ // Creates a codepoint from an encoded UTF-8 value.
+ Codepoint (uint32_t value) : value (value) {}
+
+ // Returns a C++ string containing string value of codepoint.
+ std::string as_string ();
+
+ bool operator== (Codepoint other) const
+ {
+ return value == other.value;
+ }
- // Returns a C++ string containing value of codepoint.
- ::std::string as_string ();
+ bool operator!= (Codepoint other) const
+ {
+ return !operator== (other);
+ }
};
} // namespace Rust
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index 54adf48..6f50e21 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -1,6 +1,6 @@
#include "rust-lex.h"
-#include "rust-system.h" // for rust_assert and rust_unreachable
+#include "rust-system.h" // for rust_assert and rust_unreachable
#include "rust-diagnostics.h" // for rust_error_at
#include "rust-linemap.h"
#include "safe-ctype.h"
@@ -10,7 +10,7 @@
namespace Rust {
// TODO: move to separate compilation unit?
// overload += for uint32_t to allow 32-bit encoded utf-8 to be added
- ::std::string& operator+=(::std::string& str, Codepoint char32) {
+ std::string& operator+=(std::string& str, Codepoint char32) {
if (char32.value < 0x80) {
str += static_cast<char>(char32.value);
} else if (char32.value < (0x1F + 1) << (1 * 6)) {
@@ -27,64 +27,45 @@ namespace Rust {
str += static_cast<char>(0x80 | ((char32.value >> 0) & 0x3F));
} else {
fprintf(stderr, "Invalid unicode codepoint found: '%u' \n", char32.value);
- // rust_error_at(get_current_location(), "Invalid unicode codepoint found: '%u'",
- // char32.value);
}
return str;
}
- ::std::string Codepoint::as_string() {
+ std::string Codepoint::as_string() {
std::string str;
- // do i need to do this? or can i just do str += value due to op overloading?
-
- // ok can't figure out how to just convert to codepoint or use "this" so create new one
- str += Codepoint(value);
-
- /*if (value < 0x80) {
- str += static_cast<char>(value);
- } else if (value < (0x1F + 1) << (1 * 6)) {
- str += static_cast<char>(0xC0 | ((value >> 6) & 0x1F));
- str += static_cast<char>(0x80 | ((value >> 0) & 0x3F));
- } else if (value < (0x0F + 1) << (2 * 6)) {
- str += static_cast<char>(0xE0 | ((value >> 12) & 0x0F));
- str += static_cast<char>(0x80 | ((value >> 6) & 0x3F));
- str += static_cast<char>(0x80 | ((value >> 0) & 0x3F));
- } else if (value < (0x07 + 1) << (3 * 6)) {
- str += static_cast<char>(0xF0 | ((value >> 18) & 0x07));
- str += static_cast<char>(0x80 | ((value >> 12) & 0x3F));
- str += static_cast<char>(0x80 | ((value >> 6) & 0x3F));
- str += static_cast<char>(0x80 | ((value >> 0) & 0x3F));
- } else {
- rust_error_at(get_current_location(), "Invalid unicode codepoint found: '%u'", value);
- }*/
+ // str += Codepoint (value);
+ str += *this;
+
return str;
}
- // Includes all allowable float digits EXCEPT _ and . as that needs lookahead for handling.
- inline bool is_float_digit(char number) {
+ /* Includes all allowable float digits EXCEPT _ and . as that needs lookahead
+ * for handling. */
+ bool is_float_digit(char number) {
return ISDIGIT(number) || number == 'E' || number == 'e';
}
- // Basically ISXDIGIT from safe-ctype but may change if Rust's encoding or whatever is different
- inline bool is_x_digit(char number) {
+ /* Basically ISXDIGIT from safe-ctype but may change if Rust's encoding or
+ * whatever is different */
+ bool is_x_digit(char number) {
return ISXDIGIT(number);
}
- inline bool is_octal_digit(char number) {
+ bool is_octal_digit(char number) {
return number >= '0' && number <= '7';
}
- inline bool is_bin_digit(char number) {
+ bool is_bin_digit(char number) {
return number == '0' || number == '1';
}
- inline bool check_valid_float_dot_end(char character) {
+ bool check_valid_float_dot_end(char character) {
return character != '.' && character != '_' && !ISALPHA(character);
}
// ISSPACE from safe-ctype but may change in future
- inline bool is_whitespace(char character) {
+ bool is_whitespace(char character) {
return ISSPACE(character);
}
@@ -96,21 +77,22 @@ namespace Rust {
}
Lexer::~Lexer() {
- /* ok apparently stop (which is equivalent of original code in destructor) is meant to be
- * called after all files have finished parsing, for cleanup. On the other hand, actual code
- * that it calls to leave a certain line map is mentioned in GCC docs as being useful for
- * "just leaving an included header" and stuff like that, so this line mapping functionality
- * may need fixing.
+ /* ok apparently stop (which is equivalent of original code in destructor) is
+ * meant to be called after all files have finished parsing, for cleanup. On
+ * the other hand, actual code that it calls to leave a certain line map is
+ * mentioned in GCC docs as being useful for "just leaving an included header"
+ * and stuff like that, so this line mapping functionality may need fixing.
* FIXME: find out whether this occurs. */
// line_map->stop();
}
- // TODO: need to optimise somehow to avoid the virtual function call in the tight loop.
- // Best idea at the moment is CRTP, but that might make lexer implementation annoying when storing
- // the "base class" (i.e. would need template parameter everywhere), although in practice it would
- // mostly just look ugly and make enclosing classes like Parser also require a type parameter.
- // At this point a macro might be better.
- // OK I guess macros can be replaced by constexpr if or something if possible.
+ /* TODO: need to optimise somehow to avoid the virtual function call in the
+ * tight loop. Best idea at the moment is CRTP, but that might make lexer
+ * implementation annoying when storing the "base class" (i.e. would need
+ * template parameter everywhere), although in practice it would mostly just
+ * look ugly and make enclosing classes like Parser also require a type
+ * parameter. At this point a macro might be better. OK I guess macros can be
+ * replaced by constexpr if or something if possible. */
Location Lexer::get_current_location() {
return line_map->get_location(current_column);
}
@@ -151,9 +133,9 @@ namespace Rust {
token_queue.replace_current_value(replacement);
}
- /* shitty anonymous namespace that can only be accessed inside the compilation unit - used for
- * classify_keyword
- * Binary search in sorted array of keywords created with x-macros. */
+ /* shitty anonymous namespace that can only be accessed inside the compilation
+ * unit - used for classify_keyword Binary search in sorted array of keywords
+ * created with x-macros. */
namespace {
const std::string keyword_index[] = {
#define RS_TOKEN(x, y)
@@ -172,19 +154,18 @@ namespace Rust {
};
const int num_keywords = sizeof(keyword_index) / sizeof(*keyword_index);
- }
+ } // namespace
- /* Determines whether the string passed in is a keyword or not. If it is, it returns the keyword
- * name. */
+ /* Determines whether the string passed in is a keyword or not. If it is, it
+ * returns the keyword name. */
TokenId Lexer::classify_keyword(const std::string& str) {
const std::string* last = keyword_index + num_keywords;
const std::string* idx = std::lower_bound(keyword_index, last, str);
- if (idx == last || str != *idx) {
+ if (idx == last || str != *idx)
return IDENTIFIER;
- } else {
+ else
return keyword_keys[idx - keyword_index];
- }
}
TokenPtr Lexer::build_token() {
@@ -195,9 +176,8 @@ namespace Rust {
skip_input();
// return end of file token if end of file
- if (current_char == EOF) {
+ if (current_char == EOF)
return Token::make(END_OF_FILE, loc);
- }
// detect shebang
if (loc == 1 && current_line == 1 && current_char == '#') {
@@ -231,12 +211,13 @@ namespace Rust {
// if not end of file, start tokenising
switch (current_char) {
- // ignore whitespace characters for tokens but continue updating location
+ /* ignore whitespace characters for tokens but continue updating
+ * location */
case '\n': // newline
current_line++;
current_column = 1;
// tell line_table that new line starts
- linemap_line_start(::line_table, current_line, max_column_hint);
+ line_map->start_line(current_line, max_column_hint);
continue;
case ' ': // space
current_column++;
@@ -509,23 +490,18 @@ namespace Rust {
current_column++;
return Token::make(RIGHT_CURLY, loc);
case '@':
- // TODO: i don't know what this does, does it need special handling?
current_column++;
return Token::make(PATTERN_BIND, loc);
case '$':
- // TODO: i don't know what this does, does it need special handling?
current_column++;
return Token::make(DOLLAR_SIGN, loc);
case '~':
- // TODO: i don't know what this does, does it need special handling?
current_column++;
return Token::make(TILDE, loc);
case '\\':
- // TODO: i don't know what this does, does it need special handling?
current_column++;
return Token::make(BACKSLASH, loc);
case '`':
- // TODO: i don't know what this does, does it need special handling?
current_column++;
return Token::make(BACKTICK, loc);
case '|':
@@ -599,27 +575,21 @@ namespace Rust {
// byte and byte string test
if (current_char == 'b') {
if (peek_input() == '\'') {
- // byte - allows any ascii or escapes
- // would also have to take into account escapes: \x hex_digit hex_digit,
- // \n, \r, \t, \\, \0
+ skip_input();
+ current_column++;
+ // make current char the next character
+ current_char = peek_input();
int length = 1;
// char to save
- char byte_char;
-
- skip_input();
- // make current char the next character
- current_char = peek_input();
+ char byte_char = 0;
// detect escapes
if (current_char == '\\') {
- /*skip_input();
-
- // make current_char next character (letter)
- current_char = peek_input();*/
-
- parse_escape(length, byte_char, '\'');
+ auto escape_length_pair = parse_escape('\'');
+ byte_char = escape_length_pair.first;
+ length += escape_length_pair.second;
if (byte_char > 127) {
rust_error_at(
@@ -627,15 +597,12 @@ namespace Rust {
byte_char = 0;
}
- // skip_input();
current_char = peek_input();
- length++;
if (current_char != '\'') {
rust_error_at(get_current_location(), "unclosed byte char");
}
- // TODO: ensure skipping is needed here
skip_input();
current_char = peek_input();
length++; // go to next char
@@ -645,12 +612,12 @@ namespace Rust {
skip_input();
current_char = peek_input();
+ length++;
if (current_char != '\'') {
rust_error_at(get_current_location(), "unclosed byte char");
}
- // TODO: ensure skipping is needed here
skip_input();
current_char = peek_input();
length++; // go to next char
@@ -666,18 +633,26 @@ namespace Rust {
// skip quote character
skip_input();
+ current_column++;
std::string str;
str.reserve(16); // some sensible default
int length = 1;
current_char = peek_input();
- // TODO: handle escapes properly
while (current_char != '"' && current_char != '\n') {
if (current_char == '\\') {
- char output_char = 0;
- parse_escape(length, output_char, '"');
+ auto escape_length_pair = parse_escape('"');
+ char output_char = escape_length_pair.first;
+ //length += escape_length_pair.second;
+
+ // TODO: need to fix length - after escape, the length of the line up to the next non-whitespace char of the string is added to length, which is not what we want - we want length to be replaced by that.
+ // possible option could if "if escape_length_pair.first == 0, then length = escape_length_pair.second else length += escape_length_pair.second."
+ if (output_char == 0)
+ length = escape_length_pair.second - 1;
+ else
+ length += escape_length_pair.second;
if (output_char > 127) {
rust_error_at(get_current_location(),
@@ -685,7 +660,8 @@ namespace Rust {
output_char = 0;
}
- str += output_char;
+ if (output_char != 0)
+ str += output_char;
continue;
}
@@ -702,14 +678,18 @@ namespace Rust {
if (current_char == '\n') {
rust_error_at(get_current_location(), "unended byte string literal");
} else if (current_char == '"') {
+ // TEST: hopefully column inc should make string line up properly
+ current_column++;
+
skip_input();
current_char = peek_input();
} else {
- rust_unreachable();
+ gcc_unreachable();
}
+ str.shrink_to_fit();
+
return Token::make_byte_string(loc, str);
- // TODO: ensure escapes and string continue work properly
} else if (peek_input() == 'r' && (peek_input(1) == '#' || peek_input(1) == '"')) {
// raw byte string literals
std::string str;
@@ -721,6 +701,7 @@ namespace Rust {
// get hash count at beginnning
skip_input();
current_char = peek_input();
+ length++;
while (current_char == '#') {
hash_count++;
length++;
@@ -735,6 +716,7 @@ namespace Rust {
skip_input();
current_char = peek_input();
+ length++;
while (true) {
if (current_char == '"') {
@@ -742,8 +724,8 @@ namespace Rust {
for (int i = 0; i < hash_count; i++) {
if (peek_input(i + 1) != '#') {
- enough_hashes
- = false; // could continue here - improve performance
+ enough_hashes = false; // could continue here -
+ // improve performance
}
}
@@ -765,7 +747,9 @@ namespace Rust {
current_column += length;
- return Token::make_byte_string(loc, str); // TODO: does this work properly
+ str.shrink_to_fit();
+
+ return Token::make_byte_string(loc, str);
}
}
@@ -811,65 +795,79 @@ namespace Rust {
rust_error_at(
get_current_location(), "'%s' is a forbidden raw identifier", str.c_str());
} else {
+ str.shrink_to_fit();
+
return Token::make_identifier(loc, str);
}
- } else if (peek == '"' || (peek == '#' && (ISALPHA(peek1) || peek1 == '_'))) {
- // raw string literals
- std::string str;
- str.reserve(16); // some sensible default
+ } else {
+ int peek_index = 0;
+ while (peek_input(peek_index) == '#')
+ peek_index++;
+ // TODO: optimise by using "peek_index" as the hash count - 1 or something
- int length = 1;
- int hash_count = 0;
+ if (peek_input(peek_index) == '"') {
+ // raw string literals
+ std::string str;
+ str.reserve(16); // some sensible default
- // get hash count at beginnning
- current_char = peek;
- while (current_char == '#') {
- hash_count++;
- length++;
+ int length = 1;
+ int hash_count = 0;
+
+ // get hash count at beginnning
+ current_char = peek;
+ while (current_char == '#') {
+ hash_count++;
+ length++;
+
+ skip_input();
+ current_char = peek_input();
+ }
+
+ if (current_char != '"') {
+ rust_error_at(get_current_location(), "raw string has no opening '\"'");
+ }
+ length++;
skip_input();
- current_char = peek_input();
- }
+ Codepoint current_char32 = test_peek_codepoint_input();
- if (current_char != '"') {
- rust_error_at(get_current_location(), "raw string has no opening '\"'");
- }
+ // TODO: didn't account for current_column++ somewhere - one less than is required
- skip_input();
- Codepoint current_char32 = test_peek_codepoint_input();
+ while (true) {
+ if (current_char32.value == '"') {
+ bool enough_hashes = true;
- while (true) {
- if (current_char32.value == '"') {
- bool enough_hashes = true;
+ for (int i = 0; i < hash_count; i++) {
+ // if (test_peek_codepoint_input(i + 1) != '#') {
+ // TODO: ensure this is a good enough replacement
+ if (peek_input(i + 1) != '#') {
+ enough_hashes = false; // could continue here -
+ // improve performance
+ }
+ }
- for (int i = 0; i < hash_count; i++) {
- // if (test_peek_codepoint_input(i + 1) != '#') {
- // TODO: ensure this is a good enough replacement
- if (peek_input(i + 1) != '#') {
- enough_hashes
- = false; // could continue here - improve performance
+ if (enough_hashes) {
+ // skip enough input and peek enough input
+ skip_input(hash_count); // is this enough?
+ current_char = peek_input();
+ length += hash_count + 1;
+ break;
}
}
- if (enough_hashes) {
- // skip enough input and peek enough input
- skip_input(hash_count); // is this enough?
- current_char = peek_input();
- length += hash_count + 1;
- break;
- }
- }
+ length++;
- length++;
+ str += current_char32;
+ test_skip_codepoint_input();
+ current_char32 = test_peek_codepoint_input();
+ }
- str += current_char32;
- test_skip_codepoint_input();
- current_char32 = test_peek_codepoint_input();
- }
+ current_column += length;
- current_column += length;
+ str.shrink_to_fit();
- return Token::make_string(loc, str); // TODO: does this work properly
+ return Token::make_string(loc, str);
+ }
}
}
@@ -895,16 +893,16 @@ namespace Rust {
current_column += length;
// if just a single underscore, not an identifier
- if (first_is_underscore && length == 1) {
+ if (first_is_underscore && length == 1)
return Token::make(UNDERSCORE, loc);
- }
+
+ str.shrink_to_fit();
TokenId keyword = classify_keyword(str);
- if (keyword == IDENTIFIER) {
+ if (keyword == IDENTIFIER)
return Token::make_identifier(loc, str);
- } else {
+ else
return Token::make(keyword, loc);
- }
}
// identify literals
@@ -958,19 +956,14 @@ namespace Rust {
current_column += length;
// convert hex value to decimal representation
- long hex_num = ::std::strtol(str.c_str(), NULL, 16);
-
- // create output string stream for hex value to be converted to string
- // again
- // TODO: if too slow, use sprintf
- ::std::ostringstream ostr;
- ostr << hex_num;
+ long hex_num = std::strtol(str.c_str(), NULL, 16);
- // reassign string representation to converted value
- str = ostr.str();
+ str = std::to_string(hex_num);
// parse in type suffix if it exists
- parse_in_type_suffix(/*current_char, */ type_hint, length);
+ auto type_suffix_pair = parse_in_type_suffix();
+ type_hint = type_suffix_pair.first;
+ length += type_suffix_pair.second;
if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) {
rust_error_at(get_current_location(),
@@ -985,8 +978,6 @@ namespace Rust {
length++;
- // don't add any characters as C octals are just 0124 or whatever
-
// loop through to add entire octal number to string
while (is_octal_digit(current_char) || current_char == '_') {
if (current_char == '_') {
@@ -1010,19 +1001,15 @@ namespace Rust {
current_column += length;
// convert octal value to decimal representation
- long octal_num = ::std::strtol(str.c_str(), NULL, 8);
-
- // create output string stream for octal value to be converted to
- // string again
- // TODO: if too slow, use sprintf
- ::std::ostringstream ostr;
- ostr << octal_num;
+ long octal_num = std::strtol(str.c_str(), NULL, 8);
- // reassign string representation to converted value
- str = ostr.str();
+ str = std::to_string(octal_num);
// parse in type suffix if it exists
- parse_in_type_suffix(/*current_char, */ type_hint, length);
+ // parse_in_type_suffix (/*current_char, */ type_hint, length);
+ auto type_suffix_pair = parse_in_type_suffix();
+ type_hint = type_suffix_pair.first;
+ length += type_suffix_pair.second;
if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) {
rust_error_at(get_current_location(),
@@ -1037,9 +1024,6 @@ namespace Rust {
length++;
- // don't add any characters as C binary numbers are not really
- // supported
-
// loop through to add entire binary number to string
while (is_bin_digit(current_char) || current_char == '_') {
if (current_char == '_') {
@@ -1063,19 +1047,15 @@ namespace Rust {
current_column += length;
// convert binary value to decimal representation
- long bin_num = ::std::strtol(str.c_str(), NULL, 2);
+ long bin_num = std::strtol(str.c_str(), NULL, 2);
- // create output string stream for binary value to be converted to
- // string again
- // TODO: if too slow, use sprintf
- ::std::ostringstream ostr;
- ostr << bin_num;
-
- // reassign string representation to converted value
- str = ostr.str();
+ str = std::to_string(bin_num);
// parse in type suffix if it exists
- parse_in_type_suffix(/*current_char, */ type_hint, length);
+ // parse_in_type_suffix (/*current_char, */ type_hint, length);
+ auto type_suffix_pair = parse_in_type_suffix();
+ type_hint = type_suffix_pair.first;
+ length += type_suffix_pair.second;
if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) {
rust_error_at(get_current_location(),
@@ -1089,10 +1069,13 @@ namespace Rust {
current_char = peek_input();
// parse initial decimal literal - assuming integer
- // TODO: test if works
- parse_in_decimal(/*current_char, */ str, length);
+ // parse_in_decimal (/*current_char, */ str, length);
+ auto str_length_pair = parse_in_decimal();
+ str += str_length_pair.first;
+ length += str_length_pair.second;
- // detect float literal - TODO: fix: "242." is not recognised as a float literal
+ // detect float literal - TODO: fix: "242." is not recognised as a
+ // float literal
if (current_char == '.' && is_float_digit(peek_input(1))) {
// float with a '.', parse another decimal into it
@@ -1106,16 +1089,19 @@ namespace Rust {
length++;
// parse another decimal number for float
- // TODO: test if works
- parse_in_decimal(/*current_char, */ str, length);
+ auto str_length_pair2 = parse_in_decimal();
+ str += str_length_pair2.first;
+ length += str_length_pair2.second;
// parse in exponent part if it exists
- // test to see if this works:
- parse_in_exponent_part(/*current_char, */ str, length);
+ auto exponent_part = parse_in_exponent_part();
+ str += exponent_part.first;
+ length += exponent_part.second;
// parse in type suffix if it exists
- // TODO: see if works:
- parse_in_type_suffix(/*current_char, */ type_hint, length);
+ auto type_suffix_pair = parse_in_type_suffix();
+ type_hint = type_suffix_pair.first;
+ length += type_suffix_pair.second;
if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
&& type_hint != CORETYPE_UNKNOWN) {
@@ -1123,7 +1109,6 @@ namespace Rust {
"invalid type suffix '%s' for float literal",
get_type_hint_string(type_hint));
}
-
} else if (current_char == '.' && check_valid_float_dot_end(peek_input(1))) {
is_real = true;
@@ -1138,13 +1123,6 @@ namespace Rust {
// don't parse another decimal number for float
- // parse in exponent part if it exists - shouldn't exist?
- // parse_in_exponent_part(/*current_char, */ str, length);
-
- // parse in type suffix if it exists - shouldn't exist?
- // TODO: see if works:
- // parse_in_type_suffix(/*current_char, */ type_hint, length);
-
if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
&& type_hint != CORETYPE_UNKNOWN) {
rust_error_at(get_current_location(),
@@ -1155,10 +1133,16 @@ namespace Rust {
is_real = true;
// parse exponent part
- parse_in_exponent_part(/*current_char, */ str, length);
+ // parse_in_exponent_part (/*current_char, */ str, length);
+ auto exponent_part = parse_in_exponent_part();
+ str += exponent_part.first;
+ length += exponent_part.second;
// parse in type suffix if it exists
- parse_in_type_suffix(/*current_char, */ type_hint, length);
+ // parse_in_type_suffix (/*current_char, */ type_hint, length);
+ auto type_suffix_pair = parse_in_type_suffix();
+ type_hint = type_suffix_pair.first;
+ length += type_suffix_pair.second;
if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
&& type_hint != CORETYPE_UNKNOWN) {
@@ -1170,11 +1154,15 @@ namespace Rust {
// is an integer
// parse in type suffix if it exists
- parse_in_type_suffix(/*current_char, */ type_hint, length);
+ // parse_in_type_suffix (/*current_char, */ type_hint, length);
+ auto type_suffix_pair = parse_in_type_suffix();
+ type_hint = type_suffix_pair.first;
+ length += type_suffix_pair.second;
if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) {
rust_error_at(get_current_location(),
- "invalid type suffix '%s' for integer (decimal) literal",
+ "invalid type suffix '%s' for integer "
+ "(decimal) literal",
get_type_hint_string(type_hint));
}
}
@@ -1182,12 +1170,13 @@ namespace Rust {
current_column += length;
}
+ str.shrink_to_fit();
+
// actually make the tokens
- if (is_real) {
+ if (is_real)
return Token::make_float(loc, str, type_hint);
- } else {
+ else
return Token::make_int(loc, str, type_hint);
- }
}
// string literals - not processed properly
@@ -1200,20 +1189,25 @@ namespace Rust {
int length = 1;
current_char32 = test_peek_codepoint_input();
- // ok initial peek_codepoint seems to work without "too long"
-
while (current_char32.value != '\n' && current_char32.value != '"') {
- // TODO: handle escapes and string continue
if (current_char32.value == '\\') {
// parse escape
- parse_utf8_escape(length, current_char32, '\'');
-
- // TODO: find a way to parse additional characters after the escape?
- // return after parsing escape?
-
- str += current_char32;
+ auto utf8_escape_pair = parse_utf8_escape('\'');
+ current_char32 = utf8_escape_pair.first;
+ //length += utf8_escape_pair.second;
+
+ // TODO: need to fix length - after escape, the length of the line up to the next non-whitespace char of the string is added to length, which is not what we want - we want length to be replaced by that.
+ // possible option could if "if escape_length_pair.first == 0, then length = escape_length_pair.second else length += escape_length_pair.second."
+ if (current_char32 == Codepoint(0))
+ length = utf8_escape_pair.second - 1;
+ else
+ length += utf8_escape_pair.second;
+
+ if (current_char32 != Codepoint(0))
+ str += current_char32;
- // required as parsing utf8 escape only changes current_char or something
+ // required as parsing utf8 escape only changes current_char
+ // or something
current_char32 = test_peek_codepoint_input();
continue;
@@ -1221,7 +1215,6 @@ namespace Rust {
length += test_get_input_codepoint_length();
- // does this work? not technically a char. maybe have to convert to char series
str += current_char32;
test_skip_codepoint_input();
current_char32 = test_peek_codepoint_input();
@@ -1232,21 +1225,20 @@ namespace Rust {
if (current_char32.value == '\n') {
rust_error_at(get_current_location(), "unended string literal");
} else if (current_char32.value == '"') {
+ current_column++;
+
skip_input();
-
current_char = peek_input();
} else {
- rust_unreachable();
+ gcc_unreachable();
}
+ str.shrink_to_fit();
return Token::make_string(loc, str);
- // TODO: account for escapes and string continue
- // also, in rust a string is a series of unicode characters (4 bytes)
}
// char literal attempt
if (current_char == '\'') {
- // rust chars are 4 bytes and have some weird unicode representation thing
Codepoint current_char32;
int length = 1;
@@ -1256,10 +1248,9 @@ namespace Rust {
// parse escaped char literal
if (current_char32.value == '\\') {
// parse escape
- parse_utf8_escape(length, current_char32, '\'');
-
- // TODO - this skip may not be needed?
- // test_skip_codepoint_input();
+ auto utf8_escape_pair = parse_utf8_escape('\'');
+ current_char32 = utf8_escape_pair.first;
+ length += utf8_escape_pair.second;
if (test_peek_codepoint_input().value != '\'') {
rust_error_at(get_current_location(), "unended char literal");
@@ -1271,7 +1262,6 @@ namespace Rust {
current_column += length;
- // TODO: FIX - char is actually 4 bytes in Rust (uint32) due to unicode
return Token::make_char(loc, current_char32);
} else {
// current_char32 = test_peek_codepoint_input();
@@ -1279,7 +1269,6 @@ namespace Rust {
if (test_peek_codepoint_input().value == '\'') {
// parse normal char literal
- // TODO: FIX - char is actually 4 bytes in Rust (uint32) due to unicode
// skip the ' character
skip_input();
@@ -1292,11 +1281,11 @@ namespace Rust {
} else if (ISDIGIT(current_char32.value) || ISALPHA(current_char32.value)
|| current_char32.value == '_') {
// parse lifetime name
- ::std::string str;
- // TODO: does this work properly?
+ std::string str;
str += current_char32;
- // TODO: fix lifetime name thing - actually, why am I even using utf-8 here?
+ /* TODO: fix lifetime name thing - actually, why am I even
+ * using utf-8 here? */
int length = 1;
@@ -1313,6 +1302,7 @@ namespace Rust {
current_column += length;
+ str.shrink_to_fit();
return Token::make_lifetime(loc, str);
} else {
rust_error_at(get_current_location(), "expected ' after character constant");
@@ -1327,11 +1317,12 @@ namespace Rust {
}
// Shitty pass-by-reference way of parsing in type suffix.
- bool Lexer::parse_in_type_suffix(
- /*char& current_char, */ PrimitiveCoreType& type_hint, int& length) {
- ::std::string suffix;
+ std::pair<PrimitiveCoreType, int> Lexer::parse_in_type_suffix() {
+ std::string suffix;
suffix.reserve(5);
+ int additional_length_offset = 0;
+
// get suffix
while (ISALPHA(current_char) || ISDIGIT(current_char) || current_char == '_') {
if (current_char == '_') {
@@ -1339,12 +1330,12 @@ namespace Rust {
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
continue;
}
- length++;
+ additional_length_offset++;
suffix += current_char;
skip_input();
@@ -1353,52 +1344,52 @@ namespace Rust {
if (suffix.empty()) {
// no type suffix: do nothing but also no error
- return false;
+ return std::make_pair(CORETYPE_UNKNOWN, additional_length_offset);
} else if (suffix == "f32") {
- type_hint = CORETYPE_F32;
+ return std::make_pair(CORETYPE_F32, additional_length_offset);
} else if (suffix == "f64") {
- type_hint = CORETYPE_F64;
+ return std::make_pair(CORETYPE_F64, additional_length_offset);
} else if (suffix == "i8") {
- type_hint = CORETYPE_I8;
+ return std::make_pair(CORETYPE_I8, additional_length_offset);
} else if (suffix == "i16") {
- type_hint = CORETYPE_I16;
+ return std::make_pair(CORETYPE_I16, additional_length_offset);
} else if (suffix == "i32") {
- type_hint = CORETYPE_I32;
+ return std::make_pair(CORETYPE_I32, additional_length_offset);
} else if (suffix == "i64") {
- type_hint = CORETYPE_I64;
+ return std::make_pair(CORETYPE_I64, additional_length_offset);
} else if (suffix == "i128") {
- type_hint = CORETYPE_I128;
+ return std::make_pair(CORETYPE_I128, additional_length_offset);
} else if (suffix == "isize") {
- type_hint = CORETYPE_ISIZE;
+ return std::make_pair(CORETYPE_ISIZE, additional_length_offset);
} else if (suffix == "u8") {
- type_hint = CORETYPE_U8;
+ return std::make_pair(CORETYPE_U8, additional_length_offset);
} else if (suffix == "u16") {
- type_hint = CORETYPE_U16;
+ return std::make_pair(CORETYPE_U16, additional_length_offset);
} else if (suffix == "u32") {
- type_hint = CORETYPE_U32;
+ return std::make_pair(CORETYPE_U32, additional_length_offset);
} else if (suffix == "u64") {
- type_hint = CORETYPE_U64;
+ return std::make_pair(CORETYPE_U64, additional_length_offset);
} else if (suffix == "u128") {
- type_hint = CORETYPE_U128;
+ return std::make_pair(CORETYPE_U128, additional_length_offset);
} else if (suffix == "usize") {
- type_hint = CORETYPE_USIZE;
+ return std::make_pair(CORETYPE_USIZE, additional_length_offset);
} else {
rust_error_at(get_current_location(), "unknown number suffix '%s'", suffix.c_str());
- return false;
+ return std::make_pair(CORETYPE_UNKNOWN, additional_length_offset);
}
-
- return true;
}
- void Lexer::parse_in_exponent_part(/*char& current_char, */ std::string& str, int& length) {
+ std::pair<std::string, int> Lexer::parse_in_exponent_part() {
+ int additional_length_offset = 0;
+ std::string str;
if (current_char == 'E' || current_char == 'e') {
// add exponent to string as strtod works with it
str += current_char;
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
// special - and + handling
if (current_char == '-') {
@@ -1407,46 +1398,55 @@ namespace Rust {
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
} else if (current_char == '+') {
// don't add + but still skip input
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
}
// parse another decimal number for exponent
- parse_in_decimal(/*current_char, */ str, length);
+ auto str_length_pair = parse_in_decimal();
+ str += str_length_pair.first;
+ additional_length_offset += str_length_pair.second;
}
+ return std::make_pair(str, additional_length_offset);
}
- void Lexer::parse_in_decimal(/*char& current_char, */ std::string& str, int& length) {
+ std::pair<std::string, int> Lexer::parse_in_decimal() {
+ int additional_length_offset = 0;
+ std::string str;
while (ISDIGIT(current_char) || current_char == '_') {
if (current_char == '_') {
// don't add _ to number
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
continue;
}
- length++;
+ additional_length_offset++;
str += current_char;
skip_input();
current_char = peek_input();
}
+ return std::make_pair(str, additional_length_offset);
}
- // Replace all assorted parse_x_escape with this? Avoids the backwards/peek issue.
- bool Lexer::parse_escape(int& length, char& output_char, char opening_char) {
+ /* Parses escapes (and string continues) in "byte" strings and characters. Does not support unicode. */
+ std::pair<char, int> Lexer::parse_escape(char opening_char) {
+ int additional_length_offset = 0;
+ char output_char = 0;
+
// skip to actual letter
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
switch (current_char) {
case 'x': {
@@ -1456,9 +1456,9 @@ namespace Rust {
// first hex char
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
- if (!ISXDIGIT(current_char)) {
+ if (!is_x_digit(current_char)) {
rust_error_at(get_current_location(), "invalid character '\\x%c' in \\x sequence",
current_char);
}
@@ -1467,23 +1467,21 @@ namespace Rust {
// second hex char
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
- if (!ISXDIGIT(current_char)) {
+ if (!is_x_digit(current_char)) {
rust_error_at(get_current_location(), "invalid character '\\x%c' in \\x sequence",
current_char);
}
hexNum[1] = current_char;
- long hexLong = ::std::strtol(hexNum, NULL, 16);
+ long hexLong = std::strtol(hexNum, NULL, 16);
- if (hexLong > 127)
+ if (hexLong > 255 || hexLong < 0)
rust_error_at(get_current_location(),
- "ascii \\x escape '\\x%s' out of range - allows up to '\\x7F'", hexNum);
- // gcc_assert(hexLong < 128); // as ascii
+ "byte \\x escape '\\x%s' out of range - allows up to '\\xFF'", hexNum);
char hexChar = static_cast<char>(hexLong);
- // TODO: fix - does this actually give the right character?
output_char = hexChar;
} break;
case 'n':
@@ -1507,28 +1505,34 @@ namespace Rust {
case '"':
output_char = '"';
break;
- case 'u': {
+ case 'u':
+ rust_error_at(get_current_location(),
+ "cannot have a unicode escape \\u in a byte %s!",
+ opening_char == '\'' ? "character" : "string");
+ return std::make_pair(output_char, additional_length_offset);
+#if 0
+ {
// TODO: shouldn't be used with this - use parse_utf8_escape
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
bool need_close_brace = false;
- // TODO: rustc lexer doesn't seem to allow not having { but mrustc lexer does? look at
- // spec?
+ // TODO: rustc lexer doesn't seem to allow not having { but mrustc lexer
+ // does? look at spec?
if (current_char == '{') {
need_close_brace = true;
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
}
// parse unicode escape
// 1-6 hex digits?
- ::std::string num_str;
+ std::string num_str;
num_str.reserve(6);
// test adding number directly
@@ -1541,12 +1545,12 @@ namespace Rust {
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
continue;
}
- length++;
+ additional_length_offset++;
// add raw hex numbers
num_str += current_char;
@@ -1554,7 +1558,7 @@ namespace Rust {
// test adding number directly
char tmp[2] = { current_char, 0 };
test_val *= 16;
- test_val += ::std::strtol(tmp, NULL, 16);
+ test_val += std::strtol(tmp, NULL, 16);
skip_input();
current_char = peek_input();
@@ -1565,18 +1569,21 @@ namespace Rust {
// actually an error
rust_error_at(
get_current_location(), "expected terminating '}' in unicode escape");
- return false;
+ // return false;
+ return std::make_pair(output_char, additional_length_offset);
}
// ensure 1-6 hex characters
if (num_str.length() > 6 || num_str.length() < 1) {
rust_error_at(get_current_location(),
- "unicode escape should be between 1 and 6 hex characters; it is %lu",
+ "unicode escape should be between 1 and 6 hex "
+ "characters; it is %lu",
num_str.length());
- return false;
+ // return false;
+ return std::make_pair(output_char, additional_length_offset);
}
- long hex_num = ::std::strtol(num_str.c_str(), NULL, 16);
+ long hex_num = std::strtol(num_str.c_str(), NULL, 16);
// as debug, check hex_num = test_val
if (hex_num > 255) {
@@ -1586,12 +1593,14 @@ namespace Rust {
}
// make output_char the value - UTF-8?
- // TODO: actually make this work - output char must be 4 bytes, do I need a string for
- // this?
+ // TODO: actually make this work - output char must be 4 bytes, do I
+ // need a string for this?
output_char = static_cast</*uint32_t*/ char>(hex_num);
- return true;
+ // return true;
+ return std::make_pair(output_char, additional_length_offset);
} break;
+#endif
case '\r':
case '\n':
// string continue
@@ -1600,10 +1609,10 @@ namespace Rust {
current_line++;
current_column = 1;
// tell line_table that new line starts
- linemap_line_start(::line_table, current_line, max_column_hint);
+ line_map->start_line(current_line, max_column_hint);
// reset "length"
- length = 1;
+ additional_length_offset = 1;
// get next char
skip_input();
@@ -1614,45 +1623,56 @@ namespace Rust {
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
}
- if (current_char == '\\') {
- parse_escape(length, output_char, opening_char);
- return true;
- } else if (current_char == opening_char) {
+ // shouldn't need this
+#if 0
+ if (current_char == opening_char) {
// TODO: does this skip the ' or " character? It shouldn't.
output_char = 0;
- return true;
+ // return true;
+ return std::make_pair(output_char, additional_length_offset);
} else {
- output_char = current_char;
+ // TODO: shouldn't this make output_char null so that it isn't added to string?
+ // or check for escape being zero?
+ output_char = /*current_char*/0;
// TODO: test has right result
/*skip_input();
current_char = peek_input();*/
- return true;
+ // return true;
+ return std::make_pair(output_char, additional_length_offset);
}
+#endif
+ return std::make_pair(0, additional_length_offset);
default:
rust_error_at(get_current_location(), "unknown escape sequence '\\%c'", current_char);
// returns false if no parsing could be done
- return false;
+ // return false;
+ return std::make_pair(output_char, additional_length_offset);
break;
}
- // all non-special cases (unicode, string continue) should skip their used char
+ // all non-special cases (string continue) should skip their used char
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
// returns true if parsing was successful
- return true;
+ // return true;
+ return std::make_pair(output_char, additional_length_offset);
}
- bool Lexer::parse_utf8_escape(int& length, Codepoint& output_char, char opening_char) {
+ // Parses an escape (or string continue) in a string or character. Supports unicode escapes.
+ std::pair<Codepoint, int> Lexer::parse_utf8_escape(char opening_char) {
+ Codepoint output_char;
+ int additional_length_offset = 0;
+
// skip to actual letter
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
switch (current_char) {
case 'x': {
@@ -1662,9 +1682,9 @@ namespace Rust {
// first hex char
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
- if (!ISXDIGIT(current_char)) {
+ if (!is_x_digit(current_char)) {
rust_error_at(get_current_location(), "invalid character '\\x%c' in \\x sequence",
current_char);
}
@@ -1673,15 +1693,15 @@ namespace Rust {
// second hex char
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
- if (!ISXDIGIT(current_char)) {
+ if (!is_x_digit(current_char)) {
rust_error_at(get_current_location(), "invalid character '\\x%c' in \\x sequence",
current_char);
}
hexNum[1] = current_char;
- long hexLong = ::std::strtol(hexNum, NULL, 16);
+ long hexLong = std::strtol(hexNum, NULL, 16);
if (hexLong > 127)
rust_error_at(get_current_location(),
@@ -1689,7 +1709,6 @@ namespace Rust {
// gcc_assert(hexLong < 128); // as ascii
char hexChar = static_cast<char>(hexLong);
- // TODO: fix - does this actually give the right character?
output_char = hexChar;
} break;
case 'n':
@@ -1716,28 +1735,21 @@ namespace Rust {
case 'u': {
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
bool need_close_brace = false;
-
- // TODO: rustc lexer doesn't seem to allow not having { but mrustc lexer does? look at
- // spec?
if (current_char == '{') {
need_close_brace = true;
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
}
- // parse unicode escape
- // 1-6 hex digits?
- ::std::string num_str;
+ // parse unicode escape - 1-6 hex digits
+ std::string num_str;
num_str.reserve(6);
- // test adding number directly
- uint32_t test_val;
-
// loop through to add entire hex number to string
while (is_x_digit(current_char) || current_char == '_') {
if (current_char == '_') {
@@ -1745,21 +1757,16 @@ namespace Rust {
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
continue;
}
- length++;
+ additional_length_offset++;
// add raw hex numbers
num_str += current_char;
- // test adding number directly
- char tmp[2] = { current_char, 0 };
- test_val *= 16;
- test_val += ::std::strtol(tmp, NULL, 16);
-
skip_input();
current_char = peek_input();
}
@@ -1769,36 +1776,38 @@ namespace Rust {
if (current_char == '}') {
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
} else {
// actually an error
rust_error_at(
get_current_location(), "expected terminating '}' in unicode escape");
- return false;
+ // return false;
+ return std::make_pair(output_char, additional_length_offset);
}
}
// ensure 1-6 hex characters
if (num_str.length() > 6 || num_str.length() < 1) {
rust_error_at(get_current_location(),
- "unicode escape should be between 1 and 6 hex characters; it is %lu",
+ "unicode escape should be between 1 and 6 hex "
+ "characters; it is %lu",
num_str.length());
- return false;
+ // return false;
+ return std::make_pair(output_char, additional_length_offset);
}
- long hex_num = ::std::strtol(num_str.c_str(), NULL, 16);
+ long hex_num = std::strtol(num_str.c_str(), NULL, 16);
// assert fits a uint32_t
- rust_assert(hex_num < 4294967296);
+ gcc_assert(hex_num < 4294967296);
- // ok can't figure out how to just convert to codepoint or use "this" so create new
- // one
output_char = Codepoint(static_cast<uint32_t>(hex_num));
- // TODO: what is being outputted? the escape code for the unicode char (unicode
- // number) or the character number?
+ // TODO: what is being outputted? the escape code for the unicode char
+ // (unicode number) or the character number?
- return true;
+ // return true;
+ return std::make_pair(output_char, additional_length_offset);
} break;
case '\r':
case '\n':
@@ -1808,10 +1817,10 @@ namespace Rust {
current_line++;
current_column = 1;
// tell line_table that new line starts
- linemap_line_start(::line_table, current_line, max_column_hint);
+ line_map->start_line(current_line, max_column_hint);
// reset "length"
- length = 1;
+ additional_length_offset = 1;
// get next char
skip_input();
@@ -1822,38 +1831,39 @@ namespace Rust {
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
}
- if (current_char == '\\') {
- parse_utf8_escape(length, output_char, opening_char);
- return true;
- } else if (current_char == opening_char) {
- // TODO: does this skip the ' or " character? It shouldn't.
+ // shouldn't need this
+#if 0
+ if (current_char == opening_char) {
output_char = 0;
- return true;
+ // return true;
+ return std::make_pair(output_char, additional_length_offset);
} else {
- output_char = current_char;
+ output_char = /*current_char*/0;
- // TODO: test has right result
- /*skip_input();
- current_char = peek_input();*/
-
- return true;
+ // return true;
+ return std::make_pair(output_char, additional_length_offset);
}
+#endif
+ return std::make_pair(0, additional_length_offset);
default:
rust_error_at(get_current_location(), "unknown escape sequence '\\%c'", current_char);
// returns false if no parsing could be done
- return false;
+ // return false;
+ return std::make_pair(output_char, additional_length_offset);
break;
}
- // all non-special cases (unicode, string continue) should skip their used char
+ /* all non-special cases (unicode, string continue) should skip their used
+ * char */
skip_input();
current_char = peek_input();
- length++;
+ additional_length_offset++;
// returns true if parsing was successful
- return true;
+ // return true;
+ return std::make_pair(output_char, additional_length_offset);
}
#if 0
@@ -2102,6 +2112,7 @@ namespace Rust {
}
#endif
+ // Returns the length of the codepoint at the current position.
int Lexer::test_get_input_codepoint_length() {
uint8_t input = peek_input();
@@ -2138,8 +2149,8 @@ namespace Rust {
// return 0xFFFE;
/*uint32_t output
- = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << 0);
- return output;*/
+ = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) <<
+ 0); return output;*/
return 3;
} else if ((input & 0xF8) == 0xF0) {
// 4 bytes
@@ -2168,7 +2179,7 @@ namespace Rust {
}
}
- // TODO: rewrite lexing system to use utf-8 "codepoints" rather than bytes?
+ // Returns the codepoint at the current position.
Codepoint Lexer::test_peek_codepoint_input() {
uint8_t input = peek_input();
@@ -2224,7 +2235,7 @@ namespace Rust {
void Lexer::test_skip_codepoint_input() {
int toSkip = test_get_input_codepoint_length();
- rust_assert(toSkip >= 1);
+ gcc_assert(toSkip >= 1);
skip_input(toSkip - 1);
}
@@ -2263,8 +2274,8 @@ namespace Rust {
return 0;
/*uint32_t output
- = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << 0);
- return output;*/
+ = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) <<
+ 0); return output;*/
return 3;
} else if ((input & 0xF8) == 0xF0) {
// 4 bytes
@@ -2293,7 +2304,8 @@ namespace Rust {
}
}
- // peeks the codepoint input at n codepoints ahead of current codepoint - try not to use
+ // peeks the codepoint input at n codepoints ahead of current codepoint - try
+ // not to use
Codepoint Lexer::test_peek_codepoint_input(int n) {
int totalOffset = 0;
@@ -2301,12 +2313,13 @@ namespace Rust {
for (int i = 0; i < n; i++) {
totalOffset += test_get_input_codepoint_n_length(totalOffset);
}
- // issues: this would have (at least) O(n) lookup time, not O(1) like the rest?
+ // issues: this would have (at least) O(n) lookup time, not O(1) like the
+ // rest?
// TODO: implement if still needed
// error out of function as it is not implemented
- rust_assert(1 == 0);
+ gcc_assert(1 == 0);
return { 0 };
/*
uint8_t input = peek_input();
@@ -2336,9 +2349,8 @@ namespace Rust {
return 0xFFFE;
uint32_t output
- = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << 0);
- return output;
- } else if ((input & 0xF8) == 0xF0) {
+ = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 &
+ 0x3F) << 0); return output; } else if ((input & 0xF8) == 0xF0) {
// 4 bytes
uint8_t input2 = peek_input(1);
if ((input2 & 0xC0) != 0x80)
@@ -2353,11 +2365,9 @@ namespace Rust {
return 0xFFFE;
uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12)
- | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0);
- return output;
- } else {
- rust_error_at(get_current_location(), "invalid UTF-8 (too long)");
- return 0xFFFE;
+ | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) <<
+ 0); return output; } else { rust_error_at(get_current_location(), "invalid
+ UTF-8 (too long)"); return 0xFFFE;
}*/
}
-}
+} // namespace Rust
diff --git a/gcc/rust/lex/rust-lex.h b/gcc/rust/lex/rust-lex.h
index 8dc3e31..5220753 100644
--- a/gcc/rust/lex/rust-lex.h
+++ b/gcc/rust/lex/rust-lex.h
@@ -5,6 +5,8 @@
#include "rust-buffered-queue.h"
#include "rust-token.h"
+#include <utility>
+
namespace Rust {
class Lexer
{
@@ -31,35 +33,39 @@ private:
// ok maybe all these may mean the lexer structure needs to be rethought
/* separated into functions because main method was too long, but they rely on
* and change state in the lexer, so variables must be passed by reference. */
- inline void parse_in_decimal (/*char& current_char, */ std::string &str,
- int &length);
- inline void parse_in_exponent_part (/*char& current_char, */ std::string &str,
- int &length);
- inline bool parse_in_type_suffix (
- /*char& current_char, */ PrimitiveCoreType &type_hint, int &length);
- inline bool parse_ascii_escape (/*char& current_char, */ int &length,
- char &output_char);
- inline bool parse_quote_escape (/*char& current_char, */ int &length,
- char &output_char);
- inline bool parse_unicode_escape (
- /*char& current_char, */ int &length, Codepoint &output_char);
- inline bool parse_byte_escape (/*char& current_char, */ int &length,
- char &output_char);
- inline bool parse_escape (int &length, char &output_char, char opening_char);
- inline bool parse_utf8_escape (int &length, Codepoint &output_char,
- char opening_char);
- inline int test_get_input_codepoint_length ();
- inline int test_get_input_codepoint_n_length (int n_start_offset);
- inline Codepoint test_peek_codepoint_input ();
- inline Codepoint test_peek_codepoint_input (
+ std::pair<std::string, int> parse_in_decimal ();
+ std::pair<std::string, int> parse_in_exponent_part ();
+ std::pair<PrimitiveCoreType, int> parse_in_type_suffix ();
+ /*bool parse_ascii_escape (int &length,
+ char &output_char);*/
+ /*bool parse_quote_escape (char& current_char, int &length,
+ char &output_char);*/
+ /*bool parse_unicode_escape (
+ char& current_char, int &length, Codepoint &output_char);*/
+ /*bool parse_byte_escape (char& current_char, int &length,
+ char &output_char);*/
+ std::pair<char, int> parse_escape (char opening_char);
+ std::pair<Codepoint, int> parse_utf8_escape (char opening_char);
+ int test_get_input_codepoint_length ();
+ int test_get_input_codepoint_n_length (int n_start_offset);
+ Codepoint test_peek_codepoint_input ();
+ Codepoint test_peek_codepoint_input (
int n); // maybe can use get_input_codepoint_length to get starting index
- inline void test_skip_codepoint_input ();
+ void test_skip_codepoint_input ();
public:
// Construct lexer with input file and filename provided
Lexer (const char *filename, FILE *input, Linemap *linemap);
~Lexer ();
+ // don't allow copy semantics (for now, at least)
+ Lexer (const Lexer &other) = delete;
+ Lexer &operator= (const Lexer &other) = delete;
+
+ // enable move semantics
+ Lexer (Lexer &&other) = default;
+ Lexer &operator= (Lexer &&other) = default;
+
// Returns token n tokens ahead of current position.
const_TokenPtr peek_token (int n);
// Peeks the current token.
diff --git a/gcc/rust/lex/rust-token.h b/gcc/rust/lex/rust-token.h
index 2270fa2..9dd5f0b 100644
--- a/gcc/rust/lex/rust-token.h
+++ b/gcc/rust/lex/rust-token.h
@@ -53,7 +53,6 @@ enum PrimitiveCoreType
// note that abstract, async, become, box, do, final, macro, override, priv,
// try, typeof, unsized, virtual, and yield are unused
-// TODO finish converting to rust keywords
#define RS_TOKEN_LIST \
RS_TOKEN (FIRST_TOKEN, "<first-token-marker>") \
RS_TOKEN (END_OF_FILE, "end of file") \
diff --git a/gcc/rust/rust-linemap.cc b/gcc/rust/rust-linemap.cc
index fef4603..5ee76bd 100644
--- a/gcc/rust/rust-linemap.cc
+++ b/gcc/rust/rust-linemap.cc
@@ -72,7 +72,8 @@ Gcc_linemap::to_string (Location location)
// Strip the source file down to the base file, to reduce clutter.
std::stringstream ss;
- ss << lbasename (path) << ":" << SOURCE_LINE (lmo, location.gcc_location ());
+ ss << lbasename (path) << ":" << SOURCE_LINE (lmo, location.gcc_location ())
+ << ":" << SOURCE_COLUMN (lmo, location.gcc_location ());
return ss.str ();
}
diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc
index 4308ae3..29933d5 100644
--- a/gcc/rust/rust-session-manager.cc
+++ b/gcc/rust/rust-session-manager.cc
@@ -340,7 +340,7 @@ Session::enable_dump (::std::string arg)
{
error_at (
UNKNOWN_LOCATION,
- "dumping all is not supported as of now. choose 'lex' or 'parse'");
+ "dumping all is not supported as of now. choose 'lex', 'parse', or 'target_options");
return false;
}
else if (arg == "lex")
@@ -379,13 +379,13 @@ Session::enable_dump (::std::string arg)
else if (arg == "")
{
error_at (UNKNOWN_LOCATION,
- "dump option was not given a name. choose 'lex' or 'parse'");
+ "dump option was not given a name. choose 'lex', 'parse', or 'target_options'");
return false;
}
else
{
error_at (UNKNOWN_LOCATION,
- "dump option '%s' was unrecognised. choose 'lex' or 'parse'",
+ "dump option '%s' was unrecognised. choose 'lex', 'parse', or 'target_options",
arg.c_str ());
return false;
}