aboutsummaryrefslogtreecommitdiff
path: root/gcc/rust
diff options
context:
space:
mode:
authorSimplyTheOther <simplytheother@gmail.com>2020-05-01 08:25:27 +0800
committerPhilip Herron <philip.herron@embecosm.com>2020-11-28 19:09:39 +0000
commitc7c6f785c8e893ec7bcacd1a2319ce309d2450f2 (patch)
tree390cd5960fb125655ab707cb12567d2a9a0ffeb5 /gcc/rust
parent7266d66d2321562ef74daf90ffb1ad70d081d890 (diff)
downloadgcc-c7c6f785c8e893ec7bcacd1a2319ce309d2450f2.zip
gcc-c7c6f785c8e893ec7bcacd1a2319ce309d2450f2.tar.gz
gcc-c7c6f785c8e893ec7bcacd1a2319ce309d2450f2.tar.bz2
Adding Rust target hook documentation
Added powerpc target hook and improved aarch64 feature handling Added DEC Alpha target hook Added ARC target hook Created ARM target hook (at least preliminary support)
Diffstat (limited to 'gcc/rust')
-rw-r--r--gcc/rust/Make-lang.in11
-rw-r--r--gcc/rust/lex/rust-lex.cc4526
-rw-r--r--gcc/rust/rust-session-manager.cc1462
-rw-r--r--gcc/rust/rust-session-manager.h378
-rw-r--r--gcc/rust/rust-target.def2
5 files changed, 2945 insertions, 3434 deletions
diff --git a/gcc/rust/Make-lang.in b/gcc/rust/Make-lang.in
index 6512f61..1a5622e 100644
--- a/gcc/rust/Make-lang.in
+++ b/gcc/rust/Make-lang.in
@@ -72,12 +72,15 @@ GRS_OBJS = \
$(END)
# removed object files from here
-rust_OBJS = $(GRS_OBJS) rust/rustspec.o
+# All language-specific object files for Rust.
+RUST_ALL_OBJS = $(GRS_OBJS) $(RUST_TARGET_OBJS)
+
+rust_OBJS = $(RUST_ALL_OBJS) rust/rustspec.o
# The compiler itself is called rust1 (formerly grs1)
-rust1$(exeext): $(GRS_OBJS) attribs.o $(BACKEND) $(LIBDEPS)
+rust1$(exeext): $(RUST_ALL_OBJS) attribs.o $(BACKEND) $(LIBDEPS)
+$(LLINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \
- $(GRS_OBJS) attribs.o $(BACKEND) $(LIBS) $(BACKENDLIBS)
+ $(RUST_ALL_OBJS) attribs.o $(BACKEND) $(LIBS) $(BACKENDLIBS)
# Build hooks.
@@ -169,7 +172,7 @@ rust.install-plugin:
rust.uninstall:
# -rm -rf $(DESTDIR)/$(bindir)/$(GCCRS_INSTALL_NAME)$(exeext)
-rm -f gccrs$(exeext) grs1$(exeext)
- -rm -f $(GRS_OBJS)
+ -rm -f $(RUST_ALL_OBJS)
# ^those two are a maybe
# No rust-specific selftests
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index dc51b01..54adf48 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -1,5 +1,6 @@
#include "rust-lex.h"
+#include "rust-system.h" // for rust_assert and rust_unreachable
#include "rust-diagnostics.h" // for rust_error_at
#include "rust-linemap.h"
#include "safe-ctype.h"
@@ -7,2247 +8,1853 @@
#include <sstream> // for ostringstream
namespace Rust {
-// TODO: move to separate compilation unit?
-// overload += for uint32_t to allow 32-bit encoded utf-8 to be added
-::std::string &
-operator+= (::std::string &str, Codepoint char32)
-{
- if (char32.value < 0x80)
- {
- str += static_cast<char> (char32.value);
- }
- else if (char32.value < (0x1F + 1) << (1 * 6))
- {
- str += static_cast<char> (0xC0 | ((char32.value >> 6) & 0x1F));
- str += static_cast<char> (0x80 | ((char32.value >> 0) & 0x3F));
- }
- else if (char32.value < (0x0F + 1) << (2 * 6))
- {
- str += static_cast<char> (0xE0 | ((char32.value >> 12) & 0x0F));
- str += static_cast<char> (0x80 | ((char32.value >> 6) & 0x3F));
- str += static_cast<char> (0x80 | ((char32.value >> 0) & 0x3F));
- }
- else if (char32.value < (0x07 + 1) << (3 * 6))
- {
- str += static_cast<char> (0xF0 | ((char32.value >> 18) & 0x07));
- str += static_cast<char> (0x80 | ((char32.value >> 12) & 0x3F));
- str += static_cast<char> (0x80 | ((char32.value >> 6) & 0x3F));
- str += static_cast<char> (0x80 | ((char32.value >> 0) & 0x3F));
- }
- else
- {
- fprintf (stderr, "Invalid unicode codepoint found: '%u' \n",
- char32.value);
- // rust_error_at(get_current_location(), "Invalid unicode codepoint found:
- // '%u'", char32.value);
+ // TODO: move to separate compilation unit?
+ // overload += for uint32_t to allow 32-bit encoded utf-8 to be added
+ ::std::string& operator+=(::std::string& str, Codepoint char32) {
+ if (char32.value < 0x80) {
+ str += static_cast<char>(char32.value);
+ } else if (char32.value < (0x1F + 1) << (1 * 6)) {
+ str += static_cast<char>(0xC0 | ((char32.value >> 6) & 0x1F));
+ str += static_cast<char>(0x80 | ((char32.value >> 0) & 0x3F));
+ } else if (char32.value < (0x0F + 1) << (2 * 6)) {
+ str += static_cast<char>(0xE0 | ((char32.value >> 12) & 0x0F));
+ str += static_cast<char>(0x80 | ((char32.value >> 6) & 0x3F));
+ str += static_cast<char>(0x80 | ((char32.value >> 0) & 0x3F));
+ } else if (char32.value < (0x07 + 1) << (3 * 6)) {
+ str += static_cast<char>(0xF0 | ((char32.value >> 18) & 0x07));
+ str += static_cast<char>(0x80 | ((char32.value >> 12) & 0x3F));
+ str += static_cast<char>(0x80 | ((char32.value >> 6) & 0x3F));
+ str += static_cast<char>(0x80 | ((char32.value >> 0) & 0x3F));
+ } else {
+ fprintf(stderr, "Invalid unicode codepoint found: '%u' \n", char32.value);
+ // rust_error_at(get_current_location(), "Invalid unicode codepoint found: '%u'",
+ // char32.value);
+ }
+ return str;
}
- return str;
-}
-::std::string
-Codepoint::as_string ()
-{
- std::string str;
-
- // do i need to do this? or can i just do str += value due to op overloading?
-
- // ok can't figure out how to just convert to codepoint or use "this" so
- // create new one
- str += Codepoint (value);
-
- /*if (value < 0x80) {
- str += static_cast<char>(value);
- } else if (value < (0x1F + 1) << (1 * 6)) {
- str += static_cast<char>(0xC0 | ((value >> 6) & 0x1F));
- str += static_cast<char>(0x80 | ((value >> 0) & 0x3F));
- } else if (value < (0x0F + 1) << (2 * 6)) {
- str += static_cast<char>(0xE0 | ((value >> 12) & 0x0F));
- str += static_cast<char>(0x80 | ((value >> 6) & 0x3F));
- str += static_cast<char>(0x80 | ((value >> 0) & 0x3F));
- } else if (value < (0x07 + 1) << (3 * 6)) {
- str += static_cast<char>(0xF0 | ((value >> 18) & 0x07));
- str += static_cast<char>(0x80 | ((value >> 12) & 0x3F));
- str += static_cast<char>(0x80 | ((value >> 6) & 0x3F));
- str += static_cast<char>(0x80 | ((value >> 0) & 0x3F));
- } else {
- rust_error_at(get_current_location(), "Invalid unicode codepoint found:
- '%u'", value);
- }*/
- return str;
-}
+ ::std::string Codepoint::as_string() {
+ std::string str;
+
+ // do i need to do this? or can i just do str += value due to op overloading?
+
+ // ok can't figure out how to just convert to codepoint or use "this" so create new one
+ str += Codepoint(value);
+
+ /*if (value < 0x80) {
+ str += static_cast<char>(value);
+ } else if (value < (0x1F + 1) << (1 * 6)) {
+ str += static_cast<char>(0xC0 | ((value >> 6) & 0x1F));
+ str += static_cast<char>(0x80 | ((value >> 0) & 0x3F));
+ } else if (value < (0x0F + 1) << (2 * 6)) {
+ str += static_cast<char>(0xE0 | ((value >> 12) & 0x0F));
+ str += static_cast<char>(0x80 | ((value >> 6) & 0x3F));
+ str += static_cast<char>(0x80 | ((value >> 0) & 0x3F));
+ } else if (value < (0x07 + 1) << (3 * 6)) {
+ str += static_cast<char>(0xF0 | ((value >> 18) & 0x07));
+ str += static_cast<char>(0x80 | ((value >> 12) & 0x3F));
+ str += static_cast<char>(0x80 | ((value >> 6) & 0x3F));
+ str += static_cast<char>(0x80 | ((value >> 0) & 0x3F));
+ } else {
+ rust_error_at(get_current_location(), "Invalid unicode codepoint found: '%u'", value);
+ }*/
+ return str;
+ }
-// Includes all allowable float digits EXCEPT _ and . as that needs lookahead
-// for handling.
-inline bool
-is_float_digit (char number)
-{
- return ISDIGIT (number) || number == 'E' || number == 'e';
-}
+ // Includes all allowable float digits EXCEPT _ and . as that needs lookahead for handling.
+ inline bool is_float_digit(char number) {
+ return ISDIGIT(number) || number == 'E' || number == 'e';
+ }
-// Basically ISXDIGIT from safe-ctype but may change if Rust's encoding or
-// whatever is different
-inline bool
-is_x_digit (char number)
-{
- return ISXDIGIT (number);
-}
+ // Basically ISXDIGIT from safe-ctype but may change if Rust's encoding or whatever is different
+ inline bool is_x_digit(char number) {
+ return ISXDIGIT(number);
+ }
-inline bool
-is_octal_digit (char number)
-{
- return number >= '0' && number <= '7';
-}
+ inline bool is_octal_digit(char number) {
+ return number >= '0' && number <= '7';
+ }
-inline bool
-is_bin_digit (char number)
-{
- return number == '0' || number == '1';
-}
+ inline bool is_bin_digit(char number) {
+ return number == '0' || number == '1';
+ }
-inline bool
-check_valid_float_dot_end (char character)
-{
- return character != '.' && character != '_' && !ISALPHA (character);
-}
+ inline bool check_valid_float_dot_end(char character) {
+ return character != '.' && character != '_' && !ISALPHA(character);
+ }
-// ISSPACE from safe-ctype but may change in future
-inline bool
-is_whitespace (char character)
-{
- return ISSPACE (character);
-}
+ // ISSPACE from safe-ctype but may change in future
+ inline bool is_whitespace(char character) {
+ return ISSPACE(character);
+ }
-Lexer::Lexer (const char *filename, FILE *input, Linemap *linemap)
- : input (input), current_line (1), current_column (1), line_map (linemap),
- input_source (input), input_queue (input_source), token_source (this),
- token_queue (token_source)
-{
- // inform line_table that file is being entered and is in line 1
- line_map->start_file (filename, current_line);
-}
+ Lexer::Lexer(const char* filename, FILE* input, Linemap* linemap) :
+ input(input), current_line(1), current_column(1), line_map(linemap), input_source(input),
+ input_queue(input_source), token_source(this), token_queue(token_source) {
+ // inform line_table that file is being entered and is in line 1
+ line_map->start_file(filename, current_line);
+ }
-Lexer::~Lexer ()
-{
- /* ok apparently stop (which is equivalent of original code in destructor) is
- * meant to be called after all files have finished parsing, for cleanup. On
- * the other hand, actual code that it calls to leave a certain line map is
- * mentioned in GCC docs as being useful for "just leaving an included header"
- * and stuff like that, so this line mapping functionality may need fixing.
- * FIXME: find out whether this occurs. */
- // line_map->stop();
-}
+ Lexer::~Lexer() {
+ /* ok apparently stop (which is equivalent of original code in destructor) is meant to be
+ * called after all files have finished parsing, for cleanup. On the other hand, actual code
+ * that it calls to leave a certain line map is mentioned in GCC docs as being useful for
+ * "just leaving an included header" and stuff like that, so this line mapping functionality
+ * may need fixing.
+ * FIXME: find out whether this occurs. */
+ // line_map->stop();
+ }
-// TODO: need to optimise somehow to avoid the virtual function call in the
-// tight loop. Best idea at the moment is CRTP, but that might make lexer
-// implementation annoying when storing the "base class" (i.e. would need
-// template parameter everywhere), although in practice it would mostly just
-// look ugly and make enclosing classes like Parser also require a type
-// parameter. At this point a macro might be better. OK I guess macros can be
-// replaced by constexpr if or something if possible.
-Location
-Lexer::get_current_location ()
-{
- return line_map->get_location (current_column);
-}
+ // TODO: need to optimise somehow to avoid the virtual function call in the tight loop.
+ // Best idea at the moment is CRTP, but that might make lexer implementation annoying when storing
+ // the "base class" (i.e. would need template parameter everywhere), although in practice it would
+ // mostly just look ugly and make enclosing classes like Parser also require a type parameter.
+ // At this point a macro might be better.
+ // OK I guess macros can be replaced by constexpr if or something if possible.
+ Location Lexer::get_current_location() {
+ return line_map->get_location(current_column);
+ }
-int
-Lexer::peek_input (int n)
-{
- return input_queue.peek (n);
-}
+ int Lexer::peek_input(int n) {
+ return input_queue.peek(n);
+ }
-int
-Lexer::peek_input ()
-{
- return peek_input (0);
-}
+ int Lexer::peek_input() {
+ return peek_input(0);
+ }
-void
-Lexer::skip_input (int n)
-{
- input_queue.skip (n);
-}
+ void Lexer::skip_input(int n) {
+ input_queue.skip(n);
+ }
-void
-Lexer::skip_input ()
-{
- skip_input (0);
-}
+ void Lexer::skip_input() {
+ skip_input(0);
+ }
-const_TokenPtr
-Lexer::peek_token (int n)
-{
- return token_queue.peek (n);
-}
+ const_TokenPtr Lexer::peek_token(int n) {
+ return token_queue.peek(n);
+ }
-const_TokenPtr
-Lexer::peek_token ()
-{
- return peek_token (0);
-}
+ const_TokenPtr Lexer::peek_token() {
+ return peek_token(0);
+ }
-void
-Lexer::skip_token (int n)
-{
- token_queue.skip (n);
-}
+ void Lexer::skip_token(int n) {
+ token_queue.skip(n);
+ }
-void
-Lexer::skip_token ()
-{
- skip_token (0);
-}
+ void Lexer::skip_token() {
+ skip_token(0);
+ }
-void
-Lexer::replace_current_token (TokenPtr replacement)
-{
- token_queue.replace_current_value (replacement);
-}
+ void Lexer::replace_current_token(TokenPtr replacement) {
+ token_queue.replace_current_value(replacement);
+ }
-/* shitty anonymous namespace that can only be accessed inside the compilation
- * unit - used for classify_keyword Binary search in sorted array of keywords
- * created with x-macros. */
-namespace {
-const std::string keyword_index[] = {
+ /* shitty anonymous namespace that can only be accessed inside the compilation unit - used for
+ * classify_keyword
+ * Binary search in sorted array of keywords created with x-macros. */
+ namespace {
+ const std::string keyword_index[] = {
#define RS_TOKEN(x, y)
#define RS_TOKEN_KEYWORD(name, keyword) keyword,
- RS_TOKEN_LIST
+ RS_TOKEN_LIST
#undef RS_TOKEN_KEYWORD
#undef RS_TOKEN
-};
+ };
-TokenId keyword_keys[] = {
+ TokenId keyword_keys[] = {
#define RS_TOKEN(x, y)
#define RS_TOKEN_KEYWORD(name, keyword) name,
- RS_TOKEN_LIST
+ RS_TOKEN_LIST
#undef RS_TOKEN_KEYWORD
#undef RS_TOKEN
-};
-
-const int num_keywords = sizeof (keyword_index) / sizeof (*keyword_index);
-} // namespace
-
-/* Determines whether the string passed in is a keyword or not. If it is, it
- * returns the keyword name. */
-TokenId
-Lexer::classify_keyword (const std::string &str)
-{
- const std::string *last = keyword_index + num_keywords;
- const std::string *idx = std::lower_bound (keyword_index, last, str);
-
- if (idx == last || str != *idx)
- {
- return IDENTIFIER;
- }
- else
- {
- return keyword_keys[idx - keyword_index];
- }
-}
+ };
-TokenPtr
-Lexer::build_token ()
-{
- // loop to go through multiple characters to build a single token
- while (true)
- {
- Location loc = get_current_location ();
- /*int */ current_char = peek_input ();
- skip_input ();
-
- // return end of file token if end of file
- if (current_char == EOF)
- {
- return Token::make (END_OF_FILE, loc);
- }
-
- // detect shebang
- if (loc == 1 && current_line == 1 && current_char == '#')
- {
- current_char = peek_input ();
-
- if (current_char == '!')
- {
- skip_input ();
- current_char = peek_input ();
-
- switch (current_char)
- {
- case '/':
- // shebang
-
- skip_input ();
-
- // ignore rest of line
- while (current_char != '\n')
- {
- current_char = peek_input ();
- skip_input ();
- }
-
- // newline
- current_line++;
- current_column = 1;
- // tell line_table that new line starts
- line_map->start_line (current_line, max_column_hint);
- continue;
- }
- }
- }
-
- // if not end of file, start tokenising
- switch (current_char)
- {
- // ignore whitespace characters for tokens but continue updating
- // location
- case '\n': // newline
- current_line++;
- current_column = 1;
- // tell line_table that new line starts
- linemap_line_start (::line_table, current_line, max_column_hint);
- continue;
- case ' ': // space
- current_column++;
- continue;
- case '\t': // tab
- // width of a tab is not well-defined, assume 8 spaces
- current_column += 8;
- continue;
-
- // punctuation - actual tokens
- case '=':
- if (peek_input () == '>')
- {
- // match arm arrow
- skip_input ();
- current_column += 2;
-
- return Token::make (MATCH_ARROW, loc);
- }
- else if (peek_input () == '=')
- {
- // equality operator
- skip_input ();
- current_column += 2;
-
- return Token::make (EQUAL_EQUAL, loc);
- }
- else
- {
- // assignment operator
- current_column++;
- return Token::make (EQUAL, loc);
- }
- case '(':
- current_column++;
- return Token::make (LEFT_PAREN, loc);
- case '-':
- if (peek_input () == '>')
- {
- // return type specifier
- skip_input ();
- current_column += 2;
-
- return Token::make (RETURN_TYPE, loc);
- }
- else if (peek_input () == '=')
- {
- // minus-assign
- skip_input ();
- current_column += 2;
-
- return Token::make (MINUS_EQ, loc);
- }
- else
- {
- // minus
- current_column++;
- return Token::make (MINUS, loc);
- }
- case '+':
- if (peek_input () == '=')
- {
- // add-assign
- skip_input ();
- current_column += 2;
-
- return Token::make (PLUS_EQ, loc);
- }
- else
- {
- // add
- current_column++;
- return Token::make (PLUS, loc);
- }
- case ')':
- current_column++;
- return Token::make (RIGHT_PAREN, loc);
- case ';':
- current_column++;
- return Token::make (SEMICOLON, loc);
- case '*':
- if (peek_input () == '=')
- {
- // multiplication-assign
- skip_input ();
- current_column += 2;
-
- return Token::make (ASTERISK_EQ, loc);
- }
- else
- {
- // multiplication
- current_column++;
- return Token::make (ASTERISK, loc);
- }
- case ',':
- current_column++;
- return Token::make (COMMA, loc);
- case '/':
- if (peek_input () == '=')
- {
- // division-assign
- skip_input ();
- current_column += 2;
-
- return Token::make (DIV_EQ, loc);
- }
- else if (peek_input () == '/')
- {
- // TODO: single-line doc comments
-
- // single line comment
- skip_input ();
- current_column += 2;
-
- // basically ignore until line finishes
- while (current_char != '\n' && current_char != EOF)
- {
- skip_input ();
- current_column++; // not used
- current_char = peek_input ();
- }
- continue;
- break;
- }
- else if (peek_input () == '*')
- {
- // block comment
- skip_input ();
- current_column += 2;
-
- // TODO: block doc comments
-
- current_char = peek_input ();
-
- int level = 1;
- while (level > 0)
- {
- skip_input ();
- current_column++; // for error-handling
- current_char = peek_input ();
-
- // if /* found
- if (current_char == '/')
- {
- if (peek_input (1) == '*')
- {
- // skip /* characters
- skip_input (1);
-
- current_column += 2;
-
- level += 1;
- }
- }
-
- // ignore until */ is found
- if (current_char == '*')
- {
- if (peek_input (1) == '/')
- {
- // skip */ characters
- skip_input (1);
-
- current_column += 2;
- // should only break inner loop here - seems to do so
- // break;
-
- level -= 1;
- }
- }
- }
-
- // refresh new token
- continue;
- break;
- }
- else
- {
- // division
- current_column++;
- return Token::make (DIV, loc);
- }
- case '%':
- if (peek_input () == '=')
- {
- // modulo-assign
- current_column += 2;
- return Token::make (PERCENT_EQ, loc);
- }
- else
- {
- // modulo
- current_column++;
- return Token::make (PERCENT, loc);
- }
- case '^':
- if (peek_input () == '=')
- {
- // xor-assign?
- current_column += 2;
- return Token::make (CARET_EQ, loc);
- }
- else
- {
- // xor?
- current_column++;
- return Token::make (CARET, loc);
- }
- case '<':
- if (peek_input () == '<')
- {
- if (peek_input (1) == '=')
- {
- // left-shift assign
- skip_input (1);
- current_column += 3;
-
- return Token::make (LEFT_SHIFT_EQ, loc);
- }
- else
- {
- // left-shift
- skip_input ();
- current_column += 2;
-
- return Token::make (LEFT_SHIFT, loc);
- }
- }
- else if (peek_input () == '=')
- {
- // smaller than or equal to
- skip_input ();
- current_column += 2;
-
- return Token::make (LESS_OR_EQUAL, loc);
- }
- else
- {
- // smaller than
- current_column++;
- return Token::make (LEFT_ANGLE, loc);
- }
- break;
- case '>':
- if (peek_input () == '>')
- {
- if (peek_input (1) == '=')
- {
- // right-shift-assign
- skip_input (1);
- current_column += 3;
-
- return Token::make (RIGHT_SHIFT_EQ, loc);
- }
- else
- {
- // right-shift
- skip_input ();
- current_column += 2;
-
- return Token::make (RIGHT_SHIFT, loc);
- }
- }
- else if (peek_input () == '=')
- {
- // larger than or equal to
- skip_input ();
- current_column += 2;
-
- return Token::make (GREATER_OR_EQUAL, loc);
- }
- else
- {
- // larger than
- current_column++;
- return Token::make (RIGHT_ANGLE, loc);
- }
- case ':':
- if (peek_input () == ':')
- {
- // scope resolution ::
- skip_input ();
- current_column += 2;
-
- return Token::make (SCOPE_RESOLUTION, loc);
- }
- else
- {
- // single colon :
- current_column++;
- return Token::make (COLON, loc);
- }
- case '!':
- // no special handling for macros in lexer?
- if (peek_input () == '=')
- {
- // not equal boolean operator
- skip_input ();
- current_column += 2;
-
- return Token::make (NOT_EQUAL, loc);
- }
- else
- {
- // not equal unary operator
- current_column++;
-
- return Token::make (EXCLAM, loc);
- }
- case '?':
- current_column++;
- return Token::make (QUESTION_MARK, loc);
- case '#':
- current_column++;
- return Token::make (HASH, loc);
- case '[':
- current_column++;
- return Token::make (LEFT_SQUARE, loc);
- case ']':
- current_column++;
- return Token::make (RIGHT_SQUARE, loc);
- case '{':
- current_column++;
- return Token::make (LEFT_CURLY, loc);
- case '}':
- current_column++;
- return Token::make (RIGHT_CURLY, loc);
- case '@':
- // TODO: i don't know what this does, does it need special handling?
- current_column++;
- return Token::make (PATTERN_BIND, loc);
- case '$':
- // TODO: i don't know what this does, does it need special handling?
- current_column++;
- return Token::make (DOLLAR_SIGN, loc);
- case '~':
- // TODO: i don't know what this does, does it need special handling?
- current_column++;
- return Token::make (TILDE, loc);
- case '\\':
- // TODO: i don't know what this does, does it need special handling?
- current_column++;
- return Token::make (BACKSLASH, loc);
- case '`':
- // TODO: i don't know what this does, does it need special handling?
- current_column++;
- return Token::make (BACKTICK, loc);
- case '|':
- if (peek_input () == '=')
- {
- // bitwise or-assign?
- skip_input ();
- current_column += 2;
-
- return Token::make (PIPE_EQ, loc);
- }
- else if (peek_input () == '|')
- {
- // logical or
- skip_input ();
- current_column += 2;
-
- return Token::make (OR, loc);
- }
- else
- {
- // bitwise or
- current_column++;
-
- return Token::make (PIPE, loc);
- }
- case '&':
- if (peek_input () == '=')
- {
- // bitwise and-assign?
- skip_input ();
- current_column += 2;
-
- return Token::make (AMP_EQ, loc);
- }
- else if (peek_input () == '&')
- {
- // logical and
- skip_input ();
- current_column += 2;
-
- return Token::make (LOGICAL_AND, loc);
- }
- else
- {
- // bitwise and/reference
- current_column++;
-
- return Token::make (AMP, loc);
- }
- case '.':
- if (peek_input () == '.')
- {
- if (peek_input (1) == '.')
- {
- // ellipsis
- skip_input (1);
- current_column += 3;
-
- return Token::make (ELLIPSIS, loc);
- }
- else if (peek_input (1) == '=')
- {
- // ..=
- skip_input (1);
- current_column += 3;
-
- return Token::make (DOT_DOT_EQ, loc);
- }
- else
- {
- // ..
- skip_input ();
- current_column += 2;
-
- return Token::make (DOT_DOT, loc);
- }
- }
- else if (!ISDIGIT (peek_input ()))
- {
- // single dot .
- // Only if followed by a non-number
- current_column++;
- return Token::make (DOT, loc);
- }
- }
- // TODO: special handling of _ in the lexer? instead of being identifier
-
- // byte and byte string test
- if (current_char == 'b')
- {
- if (peek_input () == '\'')
- {
- // byte - allows any ascii or escapes
- // would also have to take into account escapes: \x hex_digit
- // hex_digit, \n, \r, \t, \\, \0
-
- int length = 1;
-
- // char to save
- char byte_char;
-
- skip_input ();
- // make current char the next character
- current_char = peek_input ();
-
- // detect escapes
- if (current_char == '\\')
- {
- /*skip_input();
-
- // make current_char next character (letter)
- current_char = peek_input();*/
-
- parse_escape (length, byte_char, '\'');
-
- if (byte_char > 127)
- {
- rust_error_at (get_current_location (),
- "byte char '%c' out of range", byte_char);
- byte_char = 0;
- }
-
- // skip_input();
- current_char = peek_input ();
- length++;
-
- if (current_char != '\'')
- {
- rust_error_at (get_current_location (),
- "unclosed byte char");
- }
-
- // TODO: ensure skipping is needed here
- skip_input ();
- current_char = peek_input ();
- length++; // go to next char
- }
- else if (current_char != '\'')
- {
- // otherwise, get character from direct input character
- byte_char = current_char;
-
- skip_input ();
- current_char = peek_input ();
-
- if (current_char != '\'')
- {
- rust_error_at (get_current_location (),
- "unclosed byte char");
- }
-
- // TODO: ensure skipping is needed here
- skip_input ();
- current_char = peek_input ();
- length++; // go to next char
- }
- else
- {
- rust_error_at (get_current_location (),
- "no character inside '' for byte char");
- }
-
- current_column += length;
-
- return Token::make_byte_char (loc, byte_char);
- }
- else if (peek_input () == '"')
- {
- // byte string
-
- // skip quote character
- skip_input ();
-
- std::string str;
- str.reserve (16); // some sensible default
-
- int length = 1;
- current_char = peek_input ();
- // TODO: handle escapes properly
-
- while (current_char != '"' && current_char != '\n')
- {
- if (current_char == '\\')
- {
- char output_char = 0;
- parse_escape (length, output_char, '"');
-
- if (output_char > 127)
- {
- rust_error_at (
- get_current_location (),
- "char '%c' in byte string out of range",
- output_char);
- output_char = 0;
- }
-
- str += output_char;
-
- continue;
- }
-
- length++;
-
- str += current_char;
- skip_input ();
- current_char = peek_input ();
- }
-
- current_column += length;
-
- if (current_char == '\n')
- {
- rust_error_at (get_current_location (),
- "unended byte string literal");
- }
- else if (current_char == '"')
- {
- skip_input ();
- current_char = peek_input ();
- }
- else
- {
- gcc_unreachable ();
- }
-
- return Token::make_byte_string (loc, str);
- // TODO: ensure escapes and string continue work properly
- }
- else if (peek_input () == 'r'
- && (peek_input (1) == '#' || peek_input (1) == '"'))
- {
- // raw byte string literals
- std::string str;
- str.reserve (16); // some sensible default
-
- int length = 1;
- int hash_count = 0;
-
- // get hash count at beginnning
- skip_input ();
- current_char = peek_input ();
- while (current_char == '#')
- {
- hash_count++;
- length++;
-
- skip_input ();
- current_char = peek_input ();
- }
-
- if (current_char != '"')
- {
- rust_error_at (get_current_location (),
- "raw byte string has no opening '\"'");
- }
-
- skip_input ();
- current_char = peek_input ();
-
- while (true)
- {
- if (current_char == '"')
- {
- bool enough_hashes = true;
-
- for (int i = 0; i < hash_count; i++)
- {
- if (peek_input (i + 1) != '#')
- {
- enough_hashes = false; // could continue here -
- // improve performance
- }
- }
-
- if (enough_hashes)
- {
- // skip enough input and peek enough input
- skip_input (hash_count); // is this enough?
- current_char = peek_input ();
- length += hash_count + 1;
- break;
- }
- }
-
- length++;
-
- str += current_char;
- skip_input ();
- current_char = peek_input ();
- }
-
- current_column += length;
-
- // TODO: does this work properly
- return Token::make_byte_string (loc, str);
- }
- }
-
- // raw stuff
- if (current_char == 'r')
- {
- int peek = peek_input ();
- int peek1 = peek_input (1);
-
- if (peek == '#' && (ISALPHA (peek1) || peek1 == '_'))
- {
- // raw identifier
- std::string str;
- str.reserve (16); // default
-
- skip_input ();
- current_char = peek_input ();
-
- current_column += 2;
-
- str += current_char;
-
- bool first_is_underscore = current_char == '_';
-
- int length = 1;
- current_char = peek_input ();
- // loop through entire name
- while (ISALPHA (current_char) || ISDIGIT (current_char)
- || current_char == '_')
- {
- length++;
-
- str += current_char;
- skip_input ();
- current_char = peek_input ();
- }
-
- current_column += length;
-
- // if just a single underscore, not an identifier
- if (first_is_underscore && length == 1)
- {
- rust_error_at (get_current_location (),
- "'_' is not a valid raw identifier");
- }
-
- if (str == "crate" || str == "extern" || str == "self"
- || str == "super" || str == "Self")
- {
- rust_error_at (get_current_location (),
- "'%s' is a forbidden raw identifier",
- str.c_str ());
- }
- else
- {
- return Token::make_identifier (loc, str);
- }
- }
- else if (peek == '"'
- || (peek == '#' && (ISALPHA (peek1) || peek1 == '_')))
- {
- // raw string literals
- std::string str;
- str.reserve (16); // some sensible default
-
- int length = 1;
- int hash_count = 0;
-
- // get hash count at beginnning
- current_char = peek;
- while (current_char == '#')
- {
- hash_count++;
- length++;
-
- skip_input ();
- current_char = peek_input ();
- }
-
- if (current_char != '"')
- {
- rust_error_at (get_current_location (),
- "raw string has no opening '\"'");
- }
-
- skip_input ();
- Codepoint current_char32 = test_peek_codepoint_input ();
-
- while (true)
- {
- if (current_char32.value == '"')
- {
- bool enough_hashes = true;
-
- for (int i = 0; i < hash_count; i++)
- {
- // if (test_peek_codepoint_input(i + 1) != '#') {
- // TODO: ensure this is a good enough replacement
- if (peek_input (i + 1) != '#')
- {
- enough_hashes = false; // could continue here -
- // improve performance
- }
- }
-
- if (enough_hashes)
- {
- // skip enough input and peek enough input
- skip_input (hash_count); // is this enough?
- current_char = peek_input ();
- length += hash_count + 1;
- break;
- }
- }
-
- length++;
-
- str += current_char32;
- test_skip_codepoint_input ();
- current_char32 = test_peek_codepoint_input ();
- }
-
- current_column += length;
-
- // TODO: does this work properly
- return Token::make_string (loc, str);
- }
- }
-
- // find identifiers and keywords
- if (ISALPHA (current_char) || current_char == '_')
- {
- std::string str;
- str.reserve (16); // default
- str += current_char;
-
- bool first_is_underscore = current_char == '_';
-
- int length = 1;
- current_char = peek_input ();
- // loop through entire name
- while (ISALPHA (current_char) || ISDIGIT (current_char)
- || current_char == '_')
- {
- length++;
-
- str += current_char;
- skip_input ();
- current_char = peek_input ();
- }
-
- current_column += length;
-
- // if just a single underscore, not an identifier
- if (first_is_underscore && length == 1)
- {
- return Token::make (UNDERSCORE, loc);
- }
-
- TokenId keyword = classify_keyword (str);
- if (keyword == IDENTIFIER)
- {
- return Token::make_identifier (loc, str);
- }
- else
- {
- return Token::make (keyword, loc);
- }
- }
-
- // identify literals
- // int or float literals - not processed properly
- if (ISDIGIT (current_char) || current_char == '.')
- { // _ not allowed as first char
- std::string str;
- str.reserve (16); // some sensible default
- str += current_char;
-
- PrimitiveCoreType type_hint = CORETYPE_UNKNOWN;
-
- bool is_real = (current_char == '.');
-
- int length = 1;
-
- // handle binary, octal, hex literals
- if (current_char == '0' && !ISDIGIT (peek_input ()))
- {
- current_char = peek_input ();
-
- if (current_char == 'x')
- {
- // hex (integer only)
-
- skip_input ();
- current_char = peek_input ();
-
- length++;
-
- // add 'x' to string after 0 so it is 0xFFAA or whatever
- str += 'x';
-
- // loop through to add entire hex number to string
- while (is_x_digit (current_char) || current_char == '_')
- {
- if (current_char == '_')
- {
- // don't add _ to number
- skip_input ();
- current_char = peek_input ();
-
- length++;
-
- continue;
- }
-
- length++;
-
- // add raw hex numbers
- str += current_char;
- skip_input ();
- current_char = peek_input ();
- }
-
- current_column += length;
-
- // convert hex value to decimal representation
- long hex_num = ::std::strtol (str.c_str (), NULL, 16);
-
- // create output string stream for hex value to be converted
- // to string again
- // TODO: if too slow, use sprintf
- ::std::ostringstream ostr;
- ostr << hex_num;
-
- // reassign string representation to converted value
- str = ostr.str ();
-
- // parse in type suffix if it exists
- parse_in_type_suffix (/*current_char, */ type_hint, length);
-
- if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64)
- {
- rust_error_at (
- get_current_location (),
- "invalid type suffix '%s' for integer (hex) literal",
- get_type_hint_string (type_hint));
- }
- }
- else if (current_char == 'o')
- {
- // octal (integer only)
-
- skip_input ();
- current_char = peek_input ();
-
- length++;
-
- // don't add any characters as C octals are just 0124 or
- // whatever
-
- // loop through to add entire octal number to string
- while (is_octal_digit (current_char) || current_char == '_')
- {
- if (current_char == '_')
- {
- // don't add _ to number
- skip_input ();
- current_char = peek_input ();
-
- length++;
-
- continue;
- }
-
- length++;
-
- // add raw octal numbers
- str += current_char;
- skip_input ();
- current_char = peek_input ();
- }
-
- current_column += length;
-
- // convert octal value to decimal representation
- long octal_num = ::std::strtol (str.c_str (), NULL, 8);
-
- // create output string stream for octal value to be converted
- // to string again
- // TODO: if too slow, use sprintf
- ::std::ostringstream ostr;
- ostr << octal_num;
-
- // reassign string representation to converted value
- str = ostr.str ();
-
- // parse in type suffix if it exists
- parse_in_type_suffix (/*current_char, */ type_hint, length);
-
- if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64)
- {
- rust_error_at (
- get_current_location (),
- "invalid type suffix '%s' for integer (octal) literal",
- get_type_hint_string (type_hint));
- }
- }
- else if (current_char == 'b')
- {
- // binary (integer only)
-
- skip_input ();
- current_char = peek_input ();
-
- length++;
-
- // don't add any characters as C binary numbers are not really
- // supported
-
- // loop through to add entire binary number to string
- while (is_bin_digit (current_char) || current_char == '_')
- {
- if (current_char == '_')
- {
- // don't add _ to number
- skip_input ();
- current_char = peek_input ();
-
- length++;
-
- continue;
- }
-
- length++;
-
- // add raw binary numbers
- str += current_char;
- skip_input ();
- current_char = peek_input ();
- }
-
- current_column += length;
-
- // convert binary value to decimal representation
- long bin_num = ::std::strtol (str.c_str (), NULL, 2);
-
- // create output string stream for binary value to be
- // converted to string again
- // TODO: if too slow, use sprintf
- ::std::ostringstream ostr;
- ostr << bin_num;
-
- // reassign string representation to converted value
- str = ostr.str ();
-
- // parse in type suffix if it exists
- parse_in_type_suffix (/*current_char, */ type_hint, length);
-
- if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64)
- {
- rust_error_at (
- get_current_location (),
- "invalid type suffix '%s' for integer (binary) literal",
- get_type_hint_string (type_hint));
- }
- }
- }
- else
- {
- // handle decimals (integer or float)
-
- current_char = peek_input ();
-
- // parse initial decimal literal - assuming integer
- // TODO: test if works
- parse_in_decimal (/*current_char, */ str, length);
-
- // detect float literal - TODO: fix: "242." is not recognised as a
- // float literal
- if (current_char == '.' && is_float_digit (peek_input (1)))
- {
- // float with a '.', parse another decimal into it
-
- is_real = true;
-
- // add . to str
- str += current_char;
- skip_input ();
- current_char = peek_input ();
-
- length++;
-
- // parse another decimal number for float
- // TODO: test if works
- parse_in_decimal (/*current_char, */ str, length);
-
- // parse in exponent part if it exists
- // test to see if this works:
- parse_in_exponent_part (/*current_char, */ str, length);
-
- // parse in type suffix if it exists
- // TODO: see if works:
- parse_in_type_suffix (/*current_char, */ type_hint, length);
-
- if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
- && type_hint != CORETYPE_UNKNOWN)
- {
- rust_error_at (
- get_current_location (),
- "invalid type suffix '%s' for float literal",
- get_type_hint_string (type_hint));
- }
- }
- else if (current_char == '.'
- && check_valid_float_dot_end (peek_input (1)))
- {
- is_real = true;
-
- // add . to str
- str += current_char;
- skip_input ();
- current_char = peek_input ();
- length++;
-
- // add a '0' after the . to stop ambiguity
- str += '0';
-
- // don't parse another decimal number for float
-
- // parse in exponent part if it exists - shouldn't exist?
- // parse_in_exponent_part(/*current_char, */ str, length);
-
- // parse in type suffix if it exists - shouldn't exist?
- // TODO: see if works:
- // parse_in_type_suffix(/*current_char, */ type_hint, length);
-
- if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
- && type_hint != CORETYPE_UNKNOWN)
- {
- rust_error_at (
- get_current_location (),
- "invalid type suffix '%s' for float literal",
- get_type_hint_string (type_hint));
- }
- }
- else if (current_char == 'E' || current_char == 'e')
- {
- is_real = true;
-
- // parse exponent part
- parse_in_exponent_part (/*current_char, */ str, length);
-
- // parse in type suffix if it exists
- parse_in_type_suffix (/*current_char, */ type_hint, length);
-
- if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
- && type_hint != CORETYPE_UNKNOWN)
- {
- rust_error_at (
- get_current_location (),
- "invalid type suffix '%s' for float literal",
- get_type_hint_string (type_hint));
- }
- }
- else
- {
- // is an integer
-
- // parse in type suffix if it exists
- parse_in_type_suffix (/*current_char, */ type_hint, length);
-
- if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64)
- {
- rust_error_at (get_current_location (),
- "invalid type suffix '%s' for integer "
- "(decimal) literal",
- get_type_hint_string (type_hint));
- }
- }
-
- current_column += length;
- }
-
- // actually make the tokens
- if (is_real)
- {
- return Token::make_float (loc, str, type_hint);
- }
- else
- {
- return Token::make_int (loc, str, type_hint);
- }
- }
-
- // string literals - not processed properly
- if (current_char == '"')
- {
- Codepoint current_char32;
-
- std::string str;
- str.reserve (16); // some sensible default
-
- int length = 1;
- current_char32 = test_peek_codepoint_input ();
-
- // ok initial peek_codepoint seems to work without "too long"
-
- while (current_char32.value != '\n' && current_char32.value != '"')
- {
- // TODO: handle escapes and string continue
- if (current_char32.value == '\\')
- {
- // parse escape
- parse_utf8_escape (length, current_char32, '\'');
-
- // TODO: find a way to parse additional characters after the
- // escape? return after parsing escape?
-
- str += current_char32;
-
- // required as parsing utf8 escape only changes current_char
- // or something
- current_char32 = test_peek_codepoint_input ();
-
- continue;
- }
-
- length += test_get_input_codepoint_length ();
-
- // does this work? not technically a char. maybe have to convert
- // to char series
- str += current_char32;
- test_skip_codepoint_input ();
- current_char32 = test_peek_codepoint_input ();
- }
-
- current_column += length;
-
- if (current_char32.value == '\n')
- {
- rust_error_at (get_current_location (), "unended string literal");
- }
- else if (current_char32.value == '"')
- {
- skip_input ();
-
- current_char = peek_input ();
- }
- else
- {
- gcc_unreachable ();
- }
-
- return Token::make_string (loc, str);
- // TODO: account for escapes and string continue
- // also, in rust a string is a series of unicode characters (4 bytes)
- }
-
- // char literal attempt
- if (current_char == '\'')
- {
- // rust chars are 4 bytes and have some weird unicode representation
- // thing
- Codepoint current_char32;
-
- int length = 1;
-
- current_char32 = test_peek_codepoint_input ();
-
- // parse escaped char literal
- if (current_char32.value == '\\')
- {
- // parse escape
- parse_utf8_escape (length, current_char32, '\'');
-
- // TODO - this skip may not be needed?
- // test_skip_codepoint_input();
-
- if (test_peek_codepoint_input ().value != '\'')
- {
- rust_error_at (get_current_location (),
- "unended char literal");
- }
- else
- {
- test_skip_codepoint_input ();
- current_char = peek_input ();
- length++;
- }
-
- current_column += length;
-
- // TODO: FIX - char is actually 4 bytes in Rust (uint32) due to
- // unicode
- return Token::make_char (loc, current_char32);
- }
- else
- {
- // current_char32 = test_peek_codepoint_input();
- test_skip_codepoint_input ();
-
- if (test_peek_codepoint_input ().value == '\'')
- {
- // parse normal char literal
- // TODO: FIX - char is actually 4 bytes in Rust (uint32) due
- // to unicode
-
- // skip the ' character
- skip_input ();
- current_char = peek_input ();
-
- // TODO fix due to different widths of utf-8 chars
- current_column += 3;
-
- return Token::make_char (loc, current_char32);
- }
- else if (ISDIGIT (current_char32.value)
- || ISALPHA (current_char32.value)
- || current_char32.value == '_')
- {
- // parse lifetime name
- ::std::string str;
- // TODO: does this work properly?
- str += current_char32;
-
- // TODO: fix lifetime name thing - actually, why am I even
- // using utf-8 here?
-
- int length = 1;
-
- current_char32 = test_peek_codepoint_input ();
-
- while (ISDIGIT (current_char32.value)
- || ISALPHA (current_char32.value)
- || current_char32.value == '_')
- {
- length += test_get_input_codepoint_length ();
-
- str += current_char32;
- test_skip_codepoint_input ();
- current_char32 = test_peek_codepoint_input ();
- }
-
- current_column += length;
-
- return Token::make_lifetime (loc, str);
- }
- else
- {
- rust_error_at (get_current_location (),
- "expected ' after character constant");
- }
- }
- }
-
- // didn't match anything so error
- rust_error_at (loc, "unexpected character '%x'", current_char);
- current_column++;
+ const int num_keywords = sizeof(keyword_index) / sizeof(*keyword_index);
}
-}
-// Shitty pass-by-reference way of parsing in type suffix.
-bool
-Lexer::parse_in_type_suffix (
- /*char& current_char, */ PrimitiveCoreType &type_hint, int &length)
-{
- ::std::string suffix;
- suffix.reserve (5);
-
- // get suffix
- while (ISALPHA (current_char) || ISDIGIT (current_char)
- || current_char == '_')
- {
- if (current_char == '_')
- {
- // don't add _ to suffix
- skip_input ();
- current_char = peek_input ();
-
- length++;
-
- continue;
- }
-
- length++;
-
- suffix += current_char;
- skip_input ();
- current_char = peek_input ();
- }
+ /* Determines whether the string passed in is a keyword or not. If it is, it returns the keyword
+ * name. */
+ TokenId Lexer::classify_keyword(const std::string& str) {
+ const std::string* last = keyword_index + num_keywords;
+ const std::string* idx = std::lower_bound(keyword_index, last, str);
- if (suffix.empty ())
- {
- // no type suffix: do nothing but also no error
- return false;
- }
- else if (suffix == "f32")
- {
- type_hint = CORETYPE_F32;
- }
- else if (suffix == "f64")
- {
- type_hint = CORETYPE_F64;
- }
- else if (suffix == "i8")
- {
- type_hint = CORETYPE_I8;
- }
- else if (suffix == "i16")
- {
- type_hint = CORETYPE_I16;
- }
- else if (suffix == "i32")
- {
- type_hint = CORETYPE_I32;
- }
- else if (suffix == "i64")
- {
- type_hint = CORETYPE_I64;
- }
- else if (suffix == "i128")
- {
- type_hint = CORETYPE_I128;
- }
- else if (suffix == "isize")
- {
- type_hint = CORETYPE_ISIZE;
- }
- else if (suffix == "u8")
- {
- type_hint = CORETYPE_U8;
- }
- else if (suffix == "u16")
- {
- type_hint = CORETYPE_U16;
- }
- else if (suffix == "u32")
- {
- type_hint = CORETYPE_U32;
- }
- else if (suffix == "u64")
- {
- type_hint = CORETYPE_U64;
- }
- else if (suffix == "u128")
- {
- type_hint = CORETYPE_U128;
+ if (idx == last || str != *idx) {
+ return IDENTIFIER;
+ } else {
+ return keyword_keys[idx - keyword_index];
+ }
}
- else if (suffix == "usize")
- {
- type_hint = CORETYPE_USIZE;
+
+ TokenPtr Lexer::build_token() {
+ // loop to go through multiple characters to build a single token
+ while (true) {
+ Location loc = get_current_location();
+ /*int */ current_char = peek_input();
+ skip_input();
+
+ // return end of file token if end of file
+ if (current_char == EOF) {
+ return Token::make(END_OF_FILE, loc);
+ }
+
+ // detect shebang
+ if (loc == 1 && current_line == 1 && current_char == '#') {
+ current_char = peek_input();
+
+ if (current_char == '!') {
+ skip_input();
+ current_char = peek_input();
+
+ switch (current_char) {
+ case '/':
+ // shebang
+
+ skip_input();
+
+ // ignore rest of line
+ while (current_char != '\n') {
+ current_char = peek_input();
+ skip_input();
+ }
+
+ // newline
+ current_line++;
+ current_column = 1;
+ // tell line_table that new line starts
+ line_map->start_line(current_line, max_column_hint);
+ continue;
+ }
+ }
+ }
+
+ // if not end of file, start tokenising
+ switch (current_char) {
+ // ignore whitespace characters for tokens but continue updating location
+ case '\n': // newline
+ current_line++;
+ current_column = 1;
+ // tell line_table that new line starts
+ linemap_line_start(::line_table, current_line, max_column_hint);
+ continue;
+ case ' ': // space
+ current_column++;
+ continue;
+ case '\t': // tab
+ // width of a tab is not well-defined, assume 8 spaces
+ current_column += 8;
+ continue;
+
+ // punctuation - actual tokens
+ case '=':
+ if (peek_input() == '>') {
+ // match arm arrow
+ skip_input();
+ current_column += 2;
+
+ return Token::make(MATCH_ARROW, loc);
+ } else if (peek_input() == '=') {
+ // equality operator
+ skip_input();
+ current_column += 2;
+
+ return Token::make(EQUAL_EQUAL, loc);
+ } else {
+ // assignment operator
+ current_column++;
+ return Token::make(EQUAL, loc);
+ }
+ case '(':
+ current_column++;
+ return Token::make(LEFT_PAREN, loc);
+ case '-':
+ if (peek_input() == '>') {
+ // return type specifier
+ skip_input();
+ current_column += 2;
+
+ return Token::make(RETURN_TYPE, loc);
+ } else if (peek_input() == '=') {
+ // minus-assign
+ skip_input();
+ current_column += 2;
+
+ return Token::make(MINUS_EQ, loc);
+ } else {
+ // minus
+ current_column++;
+ return Token::make(MINUS, loc);
+ }
+ case '+':
+ if (peek_input() == '=') {
+ // add-assign
+ skip_input();
+ current_column += 2;
+
+ return Token::make(PLUS_EQ, loc);
+ } else {
+ // add
+ current_column++;
+ return Token::make(PLUS, loc);
+ }
+ case ')':
+ current_column++;
+ return Token::make(RIGHT_PAREN, loc);
+ case ';':
+ current_column++;
+ return Token::make(SEMICOLON, loc);
+ case '*':
+ if (peek_input() == '=') {
+ // multiplication-assign
+ skip_input();
+ current_column += 2;
+
+ return Token::make(ASTERISK_EQ, loc);
+ } else {
+ // multiplication
+ current_column++;
+ return Token::make(ASTERISK, loc);
+ }
+ case ',':
+ current_column++;
+ return Token::make(COMMA, loc);
+ case '/':
+ if (peek_input() == '=') {
+ // division-assign
+ skip_input();
+ current_column += 2;
+
+ return Token::make(DIV_EQ, loc);
+ } else if (peek_input() == '/') {
+ // TODO: single-line doc comments
+
+ // single line comment
+ skip_input();
+ current_column += 2;
+
+ // basically ignore until line finishes
+ while (current_char != '\n' && current_char != EOF) {
+ skip_input();
+ current_column++; // not used
+ current_char = peek_input();
+ }
+ continue;
+ break;
+ } else if (peek_input() == '*') {
+ // block comment
+ skip_input();
+ current_column += 2;
+
+ // TODO: block doc comments
+
+ current_char = peek_input();
+
+ int level = 1;
+ while (level > 0) {
+ skip_input();
+ current_column++; // for error-handling
+ current_char = peek_input();
+
+ // if /* found
+ if (current_char == '/') {
+ if (peek_input(1) == '*') {
+ // skip /* characters
+ skip_input(1);
+
+ current_column += 2;
+
+ level += 1;
+ }
+ }
+
+ // ignore until */ is found
+ if (current_char == '*') {
+ if (peek_input(1) == '/') {
+ // skip */ characters
+ skip_input(1);
+
+ current_column += 2;
+ // should only break inner loop here - seems to do so
+ // break;
+
+ level -= 1;
+ }
+ }
+ }
+
+ // refresh new token
+ continue;
+ break;
+ } else {
+ // division
+ current_column++;
+ return Token::make(DIV, loc);
+ }
+ case '%':
+ if (peek_input() == '=') {
+ // modulo-assign
+ current_column += 2;
+ return Token::make(PERCENT_EQ, loc);
+ } else {
+ // modulo
+ current_column++;
+ return Token::make(PERCENT, loc);
+ }
+ case '^':
+ if (peek_input() == '=') {
+ // xor-assign?
+ current_column += 2;
+ return Token::make(CARET_EQ, loc);
+ } else {
+ // xor?
+ current_column++;
+ return Token::make(CARET, loc);
+ }
+ case '<':
+ if (peek_input() == '<') {
+ if (peek_input(1) == '=') {
+ // left-shift assign
+ skip_input(1);
+ current_column += 3;
+
+ return Token::make(LEFT_SHIFT_EQ, loc);
+ } else {
+ // left-shift
+ skip_input();
+ current_column += 2;
+
+ return Token::make(LEFT_SHIFT, loc);
+ }
+ } else if (peek_input() == '=') {
+ // smaller than or equal to
+ skip_input();
+ current_column += 2;
+
+ return Token::make(LESS_OR_EQUAL, loc);
+ } else {
+ // smaller than
+ current_column++;
+ return Token::make(LEFT_ANGLE, loc);
+ }
+ break;
+ case '>':
+ if (peek_input() == '>') {
+ if (peek_input(1) == '=') {
+ // right-shift-assign
+ skip_input(1);
+ current_column += 3;
+
+ return Token::make(RIGHT_SHIFT_EQ, loc);
+ } else {
+ // right-shift
+ skip_input();
+ current_column += 2;
+
+ return Token::make(RIGHT_SHIFT, loc);
+ }
+ } else if (peek_input() == '=') {
+ // larger than or equal to
+ skip_input();
+ current_column += 2;
+
+ return Token::make(GREATER_OR_EQUAL, loc);
+ } else {
+ // larger than
+ current_column++;
+ return Token::make(RIGHT_ANGLE, loc);
+ }
+ case ':':
+ if (peek_input() == ':') {
+ // scope resolution ::
+ skip_input();
+ current_column += 2;
+
+ return Token::make(SCOPE_RESOLUTION, loc);
+ } else {
+ // single colon :
+ current_column++;
+ return Token::make(COLON, loc);
+ }
+ case '!':
+ // no special handling for macros in lexer?
+ if (peek_input() == '=') {
+ // not equal boolean operator
+ skip_input();
+ current_column += 2;
+
+ return Token::make(NOT_EQUAL, loc);
+ } else {
+ // not equal unary operator
+ current_column++;
+
+ return Token::make(EXCLAM, loc);
+ }
+ case '?':
+ current_column++;
+ return Token::make(QUESTION_MARK, loc);
+ case '#':
+ current_column++;
+ return Token::make(HASH, loc);
+ case '[':
+ current_column++;
+ return Token::make(LEFT_SQUARE, loc);
+ case ']':
+ current_column++;
+ return Token::make(RIGHT_SQUARE, loc);
+ case '{':
+ current_column++;
+ return Token::make(LEFT_CURLY, loc);
+ case '}':
+ current_column++;
+ return Token::make(RIGHT_CURLY, loc);
+ case '@':
+ // TODO: i don't know what this does, does it need special handling?
+ current_column++;
+ return Token::make(PATTERN_BIND, loc);
+ case '$':
+ // TODO: i don't know what this does, does it need special handling?
+ current_column++;
+ return Token::make(DOLLAR_SIGN, loc);
+ case '~':
+ // TODO: i don't know what this does, does it need special handling?
+ current_column++;
+ return Token::make(TILDE, loc);
+ case '\\':
+ // TODO: i don't know what this does, does it need special handling?
+ current_column++;
+ return Token::make(BACKSLASH, loc);
+ case '`':
+ // TODO: i don't know what this does, does it need special handling?
+ current_column++;
+ return Token::make(BACKTICK, loc);
+ case '|':
+ if (peek_input() == '=') {
+ // bitwise or-assign?
+ skip_input();
+ current_column += 2;
+
+ return Token::make(PIPE_EQ, loc);
+ } else if (peek_input() == '|') {
+ // logical or
+ skip_input();
+ current_column += 2;
+
+ return Token::make(OR, loc);
+ } else {
+ // bitwise or
+ current_column++;
+
+ return Token::make(PIPE, loc);
+ }
+ case '&':
+ if (peek_input() == '=') {
+ // bitwise and-assign?
+ skip_input();
+ current_column += 2;
+
+ return Token::make(AMP_EQ, loc);
+ } else if (peek_input() == '&') {
+ // logical and
+ skip_input();
+ current_column += 2;
+
+ return Token::make(LOGICAL_AND, loc);
+ } else {
+ // bitwise and/reference
+ current_column++;
+
+ return Token::make(AMP, loc);
+ }
+ case '.':
+ if (peek_input() == '.') {
+ if (peek_input(1) == '.') {
+ // ellipsis
+ skip_input(1);
+ current_column += 3;
+
+ return Token::make(ELLIPSIS, loc);
+ } else if (peek_input(1) == '=') {
+ // ..=
+ skip_input(1);
+ current_column += 3;
+
+ return Token::make(DOT_DOT_EQ, loc);
+ } else {
+ // ..
+ skip_input();
+ current_column += 2;
+
+ return Token::make(DOT_DOT, loc);
+ }
+ } else if (!ISDIGIT(peek_input())) {
+ // single dot .
+ // Only if followed by a non-number
+ current_column++;
+ return Token::make(DOT, loc);
+ }
+ }
+ // TODO: special handling of _ in the lexer? instead of being identifier
+
+ // byte and byte string test
+ if (current_char == 'b') {
+ if (peek_input() == '\'') {
+ // byte - allows any ascii or escapes
+ // would also have to take into account escapes: \x hex_digit hex_digit,
+ // \n, \r, \t, \\, \0
+
+ int length = 1;
+
+ // char to save
+ char byte_char;
+
+ skip_input();
+ // make current char the next character
+ current_char = peek_input();
+
+ // detect escapes
+ if (current_char == '\\') {
+ /*skip_input();
+
+ // make current_char next character (letter)
+ current_char = peek_input();*/
+
+ parse_escape(length, byte_char, '\'');
+
+ if (byte_char > 127) {
+ rust_error_at(
+ get_current_location(), "byte char '%c' out of range", byte_char);
+ byte_char = 0;
+ }
+
+ // skip_input();
+ current_char = peek_input();
+ length++;
+
+ if (current_char != '\'') {
+ rust_error_at(get_current_location(), "unclosed byte char");
+ }
+
+ // TODO: ensure skipping is needed here
+ skip_input();
+ current_char = peek_input();
+ length++; // go to next char
+ } else if (current_char != '\'') {
+ // otherwise, get character from direct input character
+ byte_char = current_char;
+
+ skip_input();
+ current_char = peek_input();
+
+ if (current_char != '\'') {
+ rust_error_at(get_current_location(), "unclosed byte char");
+ }
+
+ // TODO: ensure skipping is needed here
+ skip_input();
+ current_char = peek_input();
+ length++; // go to next char
+ } else {
+ rust_error_at(get_current_location(), "no character inside '' for byte char");
+ }
+
+ current_column += length;
+
+ return Token::make_byte_char(loc, byte_char);
+ } else if (peek_input() == '"') {
+ // byte string
+
+ // skip quote character
+ skip_input();
+
+ std::string str;
+ str.reserve(16); // some sensible default
+
+ int length = 1;
+ current_char = peek_input();
+ // TODO: handle escapes properly
+
+ while (current_char != '"' && current_char != '\n') {
+ if (current_char == '\\') {
+ char output_char = 0;
+ parse_escape(length, output_char, '"');
+
+ if (output_char > 127) {
+ rust_error_at(get_current_location(),
+ "char '%c' in byte string out of range", output_char);
+ output_char = 0;
+ }
+
+ str += output_char;
+
+ continue;
+ }
+
+ length++;
+
+ str += current_char;
+ skip_input();
+ current_char = peek_input();
+ }
+
+ current_column += length;
+
+ if (current_char == '\n') {
+ rust_error_at(get_current_location(), "unended byte string literal");
+ } else if (current_char == '"') {
+ skip_input();
+ current_char = peek_input();
+ } else {
+ rust_unreachable();
+ }
+
+ return Token::make_byte_string(loc, str);
+ // TODO: ensure escapes and string continue work properly
+ } else if (peek_input() == 'r' && (peek_input(1) == '#' || peek_input(1) == '"')) {
+ // raw byte string literals
+ std::string str;
+ str.reserve(16); // some sensible default
+
+ int length = 1;
+ int hash_count = 0;
+
+ // get hash count at beginnning
+ skip_input();
+ current_char = peek_input();
+ while (current_char == '#') {
+ hash_count++;
+ length++;
+
+ skip_input();
+ current_char = peek_input();
+ }
+
+ if (current_char != '"') {
+ rust_error_at(get_current_location(), "raw byte string has no opening '\"'");
+ }
+
+ skip_input();
+ current_char = peek_input();
+
+ while (true) {
+ if (current_char == '"') {
+ bool enough_hashes = true;
+
+ for (int i = 0; i < hash_count; i++) {
+ if (peek_input(i + 1) != '#') {
+ enough_hashes
+ = false; // could continue here - improve performance
+ }
+ }
+
+ if (enough_hashes) {
+ // skip enough input and peek enough input
+ skip_input(hash_count); // is this enough?
+ current_char = peek_input();
+ length += hash_count + 1;
+ break;
+ }
+ }
+
+ length++;
+
+ str += current_char;
+ skip_input();
+ current_char = peek_input();
+ }
+
+ current_column += length;
+
+ return Token::make_byte_string(loc, str); // TODO: does this work properly
+ }
+ }
+
+ // raw stuff
+ if (current_char == 'r') {
+ int peek = peek_input();
+ int peek1 = peek_input(1);
+
+ if (peek == '#' && (ISALPHA(peek1) || peek1 == '_')) {
+ // raw identifier
+ std::string str;
+ str.reserve(16); // default
+
+ skip_input();
+ current_char = peek_input();
+
+ current_column += 2;
+
+ str += current_char;
+
+ bool first_is_underscore = current_char == '_';
+
+ int length = 1;
+ current_char = peek_input();
+ // loop through entire name
+ while (ISALPHA(current_char) || ISDIGIT(current_char) || current_char == '_') {
+ length++;
+
+ str += current_char;
+ skip_input();
+ current_char = peek_input();
+ }
+
+ current_column += length;
+
+ // if just a single underscore, not an identifier
+ if (first_is_underscore && length == 1) {
+ rust_error_at(get_current_location(), "'_' is not a valid raw identifier");
+ }
+
+ if (str == "crate" || str == "extern" || str == "self" || str == "super"
+ || str == "Self") {
+ rust_error_at(
+ get_current_location(), "'%s' is a forbidden raw identifier", str.c_str());
+ } else {
+ return Token::make_identifier(loc, str);
+ }
+ } else if (peek == '"' || (peek == '#' && (ISALPHA(peek1) || peek1 == '_'))) {
+ // raw string literals
+ std::string str;
+ str.reserve(16); // some sensible default
+
+ int length = 1;
+ int hash_count = 0;
+
+ // get hash count at beginnning
+ current_char = peek;
+ while (current_char == '#') {
+ hash_count++;
+ length++;
+
+ skip_input();
+ current_char = peek_input();
+ }
+
+ if (current_char != '"') {
+ rust_error_at(get_current_location(), "raw string has no opening '\"'");
+ }
+
+ skip_input();
+ Codepoint current_char32 = test_peek_codepoint_input();
+
+ while (true) {
+ if (current_char32.value == '"') {
+ bool enough_hashes = true;
+
+ for (int i = 0; i < hash_count; i++) {
+ // if (test_peek_codepoint_input(i + 1) != '#') {
+ // TODO: ensure this is a good enough replacement
+ if (peek_input(i + 1) != '#') {
+ enough_hashes
+ = false; // could continue here - improve performance
+ }
+ }
+
+ if (enough_hashes) {
+ // skip enough input and peek enough input
+ skip_input(hash_count); // is this enough?
+ current_char = peek_input();
+ length += hash_count + 1;
+ break;
+ }
+ }
+
+ length++;
+
+ str += current_char32;
+ test_skip_codepoint_input();
+ current_char32 = test_peek_codepoint_input();
+ }
+
+ current_column += length;
+
+ return Token::make_string(loc, str); // TODO: does this work properly
+ }
+ }
+
+ // find identifiers and keywords
+ if (ISALPHA(current_char) || current_char == '_') {
+ std::string str;
+ str.reserve(16); // default
+ str += current_char;
+
+ bool first_is_underscore = current_char == '_';
+
+ int length = 1;
+ current_char = peek_input();
+ // loop through entire name
+ while (ISALPHA(current_char) || ISDIGIT(current_char) || current_char == '_') {
+ length++;
+
+ str += current_char;
+ skip_input();
+ current_char = peek_input();
+ }
+
+ current_column += length;
+
+ // if just a single underscore, not an identifier
+ if (first_is_underscore && length == 1) {
+ return Token::make(UNDERSCORE, loc);
+ }
+
+ TokenId keyword = classify_keyword(str);
+ if (keyword == IDENTIFIER) {
+ return Token::make_identifier(loc, str);
+ } else {
+ return Token::make(keyword, loc);
+ }
+ }
+
+ // identify literals
+ // int or float literals - not processed properly
+ if (ISDIGIT(current_char) || current_char == '.') { // _ not allowed as first char
+ std::string str;
+ str.reserve(16); // some sensible default
+ str += current_char;
+
+ PrimitiveCoreType type_hint = CORETYPE_UNKNOWN;
+
+ bool is_real = (current_char == '.');
+
+ int length = 1;
+
+ // handle binary, octal, hex literals
+ if (current_char == '0' && !ISDIGIT(peek_input())) {
+ current_char = peek_input();
+
+ if (current_char == 'x') {
+ // hex (integer only)
+
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+
+ // add 'x' to string after 0 so it is 0xFFAA or whatever
+ str += 'x';
+
+ // loop through to add entire hex number to string
+ while (is_x_digit(current_char) || current_char == '_') {
+ if (current_char == '_') {
+ // don't add _ to number
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+
+ continue;
+ }
+
+ length++;
+
+ // add raw hex numbers
+ str += current_char;
+ skip_input();
+ current_char = peek_input();
+ }
+
+ current_column += length;
+
+ // convert hex value to decimal representation
+ long hex_num = ::std::strtol(str.c_str(), NULL, 16);
+
+ // create output string stream for hex value to be converted to string
+ // again
+ // TODO: if too slow, use sprintf
+ ::std::ostringstream ostr;
+ ostr << hex_num;
+
+ // reassign string representation to converted value
+ str = ostr.str();
+
+ // parse in type suffix if it exists
+ parse_in_type_suffix(/*current_char, */ type_hint, length);
+
+ if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) {
+ rust_error_at(get_current_location(),
+ "invalid type suffix '%s' for integer (hex) literal",
+ get_type_hint_string(type_hint));
+ }
+ } else if (current_char == 'o') {
+ // octal (integer only)
+
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+
+ // don't add any characters as C octals are just 0124 or whatever
+
+ // loop through to add entire octal number to string
+ while (is_octal_digit(current_char) || current_char == '_') {
+ if (current_char == '_') {
+ // don't add _ to number
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+
+ continue;
+ }
+
+ length++;
+
+ // add raw octal numbers
+ str += current_char;
+ skip_input();
+ current_char = peek_input();
+ }
+
+ current_column += length;
+
+ // convert octal value to decimal representation
+ long octal_num = ::std::strtol(str.c_str(), NULL, 8);
+
+ // create output string stream for octal value to be converted to
+ // string again
+ // TODO: if too slow, use sprintf
+ ::std::ostringstream ostr;
+ ostr << octal_num;
+
+ // reassign string representation to converted value
+ str = ostr.str();
+
+ // parse in type suffix if it exists
+ parse_in_type_suffix(/*current_char, */ type_hint, length);
+
+ if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) {
+ rust_error_at(get_current_location(),
+ "invalid type suffix '%s' for integer (octal) literal",
+ get_type_hint_string(type_hint));
+ }
+ } else if (current_char == 'b') {
+ // binary (integer only)
+
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+
+ // don't add any characters as C binary numbers are not really
+ // supported
+
+ // loop through to add entire binary number to string
+ while (is_bin_digit(current_char) || current_char == '_') {
+ if (current_char == '_') {
+ // don't add _ to number
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+
+ continue;
+ }
+
+ length++;
+
+ // add raw binary numbers
+ str += current_char;
+ skip_input();
+ current_char = peek_input();
+ }
+
+ current_column += length;
+
+ // convert binary value to decimal representation
+ long bin_num = ::std::strtol(str.c_str(), NULL, 2);
+
+ // create output string stream for binary value to be converted to
+ // string again
+ // TODO: if too slow, use sprintf
+ ::std::ostringstream ostr;
+ ostr << bin_num;
+
+ // reassign string representation to converted value
+ str = ostr.str();
+
+ // parse in type suffix if it exists
+ parse_in_type_suffix(/*current_char, */ type_hint, length);
+
+ if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) {
+ rust_error_at(get_current_location(),
+ "invalid type suffix '%s' for integer (binary) literal",
+ get_type_hint_string(type_hint));
+ }
+ }
+ } else {
+ // handle decimals (integer or float)
+
+ current_char = peek_input();
+
+ // parse initial decimal literal - assuming integer
+ // TODO: test if works
+ parse_in_decimal(/*current_char, */ str, length);
+
+ // detect float literal - TODO: fix: "242." is not recognised as a float literal
+ if (current_char == '.' && is_float_digit(peek_input(1))) {
+ // float with a '.', parse another decimal into it
+
+ is_real = true;
+
+ // add . to str
+ str += current_char;
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+
+ // parse another decimal number for float
+ // TODO: test if works
+ parse_in_decimal(/*current_char, */ str, length);
+
+ // parse in exponent part if it exists
+ // test to see if this works:
+ parse_in_exponent_part(/*current_char, */ str, length);
+
+ // parse in type suffix if it exists
+ // TODO: see if works:
+ parse_in_type_suffix(/*current_char, */ type_hint, length);
+
+ if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
+ && type_hint != CORETYPE_UNKNOWN) {
+ rust_error_at(get_current_location(),
+ "invalid type suffix '%s' for float literal",
+ get_type_hint_string(type_hint));
+ }
+
+ } else if (current_char == '.' && check_valid_float_dot_end(peek_input(1))) {
+ is_real = true;
+
+ // add . to str
+ str += current_char;
+ skip_input();
+ current_char = peek_input();
+ length++;
+
+ // add a '0' after the . to stop ambiguity
+ str += '0';
+
+ // don't parse another decimal number for float
+
+ // parse in exponent part if it exists - shouldn't exist?
+ // parse_in_exponent_part(/*current_char, */ str, length);
+
+ // parse in type suffix if it exists - shouldn't exist?
+ // TODO: see if works:
+ // parse_in_type_suffix(/*current_char, */ type_hint, length);
+
+ if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
+ && type_hint != CORETYPE_UNKNOWN) {
+ rust_error_at(get_current_location(),
+ "invalid type suffix '%s' for float literal",
+ get_type_hint_string(type_hint));
+ }
+ } else if (current_char == 'E' || current_char == 'e') {
+ is_real = true;
+
+ // parse exponent part
+ parse_in_exponent_part(/*current_char, */ str, length);
+
+ // parse in type suffix if it exists
+ parse_in_type_suffix(/*current_char, */ type_hint, length);
+
+ if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
+ && type_hint != CORETYPE_UNKNOWN) {
+ rust_error_at(get_current_location(),
+ "invalid type suffix '%s' for float literal",
+ get_type_hint_string(type_hint));
+ }
+ } else {
+ // is an integer
+
+ // parse in type suffix if it exists
+ parse_in_type_suffix(/*current_char, */ type_hint, length);
+
+ if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) {
+ rust_error_at(get_current_location(),
+ "invalid type suffix '%s' for integer (decimal) literal",
+ get_type_hint_string(type_hint));
+ }
+ }
+
+ current_column += length;
+ }
+
+ // actually make the tokens
+ if (is_real) {
+ return Token::make_float(loc, str, type_hint);
+ } else {
+ return Token::make_int(loc, str, type_hint);
+ }
+ }
+
+ // string literals - not processed properly
+ if (current_char == '"') {
+ Codepoint current_char32;
+
+ std::string str;
+ str.reserve(16); // some sensible default
+
+ int length = 1;
+ current_char32 = test_peek_codepoint_input();
+
+ // ok initial peek_codepoint seems to work without "too long"
+
+ while (current_char32.value != '\n' && current_char32.value != '"') {
+ // TODO: handle escapes and string continue
+ if (current_char32.value == '\\') {
+ // parse escape
+ parse_utf8_escape(length, current_char32, '\'');
+
+ // TODO: find a way to parse additional characters after the escape?
+ // return after parsing escape?
+
+ str += current_char32;
+
+ // required as parsing utf8 escape only changes current_char or something
+ current_char32 = test_peek_codepoint_input();
+
+ continue;
+ }
+
+ length += test_get_input_codepoint_length();
+
+ // does this work? not technically a char. maybe have to convert to char series
+ str += current_char32;
+ test_skip_codepoint_input();
+ current_char32 = test_peek_codepoint_input();
+ }
+
+ current_column += length;
+
+ if (current_char32.value == '\n') {
+ rust_error_at(get_current_location(), "unended string literal");
+ } else if (current_char32.value == '"') {
+ skip_input();
+
+ current_char = peek_input();
+ } else {
+ rust_unreachable();
+ }
+
+ return Token::make_string(loc, str);
+ // TODO: account for escapes and string continue
+ // also, in rust a string is a series of unicode characters (4 bytes)
+ }
+
+ // char literal attempt
+ if (current_char == '\'') {
+ // rust chars are 4 bytes and have some weird unicode representation thing
+ Codepoint current_char32;
+
+ int length = 1;
+
+ current_char32 = test_peek_codepoint_input();
+
+ // parse escaped char literal
+ if (current_char32.value == '\\') {
+ // parse escape
+ parse_utf8_escape(length, current_char32, '\'');
+
+ // TODO - this skip may not be needed?
+ // test_skip_codepoint_input();
+
+ if (test_peek_codepoint_input().value != '\'') {
+ rust_error_at(get_current_location(), "unended char literal");
+ } else {
+ test_skip_codepoint_input();
+ current_char = peek_input();
+ length++;
+ }
+
+ current_column += length;
+
+ // TODO: FIX - char is actually 4 bytes in Rust (uint32) due to unicode
+ return Token::make_char(loc, current_char32);
+ } else {
+ // current_char32 = test_peek_codepoint_input();
+ test_skip_codepoint_input();
+
+ if (test_peek_codepoint_input().value == '\'') {
+ // parse normal char literal
+ // TODO: FIX - char is actually 4 bytes in Rust (uint32) due to unicode
+
+ // skip the ' character
+ skip_input();
+ current_char = peek_input();
+
+ // TODO fix due to different widths of utf-8 chars
+ current_column += 3;
+
+ return Token::make_char(loc, current_char32);
+ } else if (ISDIGIT(current_char32.value) || ISALPHA(current_char32.value)
+ || current_char32.value == '_') {
+ // parse lifetime name
+ ::std::string str;
+ // TODO: does this work properly?
+ str += current_char32;
+
+ // TODO: fix lifetime name thing - actually, why am I even using utf-8 here?
+
+ int length = 1;
+
+ current_char32 = test_peek_codepoint_input();
+
+ while (ISDIGIT(current_char32.value) || ISALPHA(current_char32.value)
+ || current_char32.value == '_') {
+ length += test_get_input_codepoint_length();
+
+ str += current_char32;
+ test_skip_codepoint_input();
+ current_char32 = test_peek_codepoint_input();
+ }
+
+ current_column += length;
+
+ return Token::make_lifetime(loc, str);
+ } else {
+ rust_error_at(get_current_location(), "expected ' after character constant");
+ }
+ }
+ }
+
+ // didn't match anything so error
+ rust_error_at(loc, "unexpected character '%x'", current_char);
+ current_column++;
+ }
}
- else
- {
- rust_error_at (get_current_location (), "unknown number suffix '%s'",
- suffix.c_str ());
- return false;
+ // Shitty pass-by-reference way of parsing in type suffix.
+ bool Lexer::parse_in_type_suffix(
+ /*char& current_char, */ PrimitiveCoreType& type_hint, int& length) {
+ ::std::string suffix;
+ suffix.reserve(5);
+
+ // get suffix
+ while (ISALPHA(current_char) || ISDIGIT(current_char) || current_char == '_') {
+ if (current_char == '_') {
+ // don't add _ to suffix
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+
+ continue;
+ }
+
+ length++;
+
+ suffix += current_char;
+ skip_input();
+ current_char = peek_input();
+ }
+
+ if (suffix.empty()) {
+ // no type suffix: do nothing but also no error
+ return false;
+ } else if (suffix == "f32") {
+ type_hint = CORETYPE_F32;
+ } else if (suffix == "f64") {
+ type_hint = CORETYPE_F64;
+ } else if (suffix == "i8") {
+ type_hint = CORETYPE_I8;
+ } else if (suffix == "i16") {
+ type_hint = CORETYPE_I16;
+ } else if (suffix == "i32") {
+ type_hint = CORETYPE_I32;
+ } else if (suffix == "i64") {
+ type_hint = CORETYPE_I64;
+ } else if (suffix == "i128") {
+ type_hint = CORETYPE_I128;
+ } else if (suffix == "isize") {
+ type_hint = CORETYPE_ISIZE;
+ } else if (suffix == "u8") {
+ type_hint = CORETYPE_U8;
+ } else if (suffix == "u16") {
+ type_hint = CORETYPE_U16;
+ } else if (suffix == "u32") {
+ type_hint = CORETYPE_U32;
+ } else if (suffix == "u64") {
+ type_hint = CORETYPE_U64;
+ } else if (suffix == "u128") {
+ type_hint = CORETYPE_U128;
+ } else if (suffix == "usize") {
+ type_hint = CORETYPE_USIZE;
+ } else {
+ rust_error_at(get_current_location(), "unknown number suffix '%s'", suffix.c_str());
+
+ return false;
+ }
+
+ return true;
}
- return true;
-}
+ void Lexer::parse_in_exponent_part(/*char& current_char, */ std::string& str, int& length) {
+ if (current_char == 'E' || current_char == 'e') {
+ // add exponent to string as strtod works with it
+ str += current_char;
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+
+ // special - and + handling
+ if (current_char == '-') {
+ str += '-';
+
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+ } else if (current_char == '+') {
+ // don't add + but still skip input
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+ }
-void
-Lexer::parse_in_exponent_part (/*char& current_char, */ std::string &str,
- int &length)
-{
- if (current_char == 'E' || current_char == 'e')
- {
- // add exponent to string as strtod works with it
- str += current_char;
- skip_input ();
- current_char = peek_input ();
-
- length++;
-
- // special - and + handling
- if (current_char == '-')
- {
- str += '-';
-
- skip_input ();
- current_char = peek_input ();
-
- length++;
- }
- else if (current_char == '+')
- {
- // don't add + but still skip input
- skip_input ();
- current_char = peek_input ();
-
- length++;
- }
-
- // parse another decimal number for exponent
- parse_in_decimal (/*current_char, */ str, length);
+ // parse another decimal number for exponent
+ parse_in_decimal(/*current_char, */ str, length);
+ }
}
-}
-void
-Lexer::parse_in_decimal (/*char& current_char, */ std::string &str, int &length)
-{
- while (ISDIGIT (current_char) || current_char == '_')
- {
- if (current_char == '_')
- {
- // don't add _ to number
- skip_input ();
- current_char = peek_input ();
+ void Lexer::parse_in_decimal(/*char& current_char, */ std::string& str, int& length) {
+ while (ISDIGIT(current_char) || current_char == '_') {
+ if (current_char == '_') {
+ // don't add _ to number
+ skip_input();
+ current_char = peek_input();
- length++;
+ length++;
- continue;
- }
+ continue;
+ }
- length++;
+ length++;
- str += current_char;
- skip_input ();
- current_char = peek_input ();
+ str += current_char;
+ skip_input();
+ current_char = peek_input();
+ }
}
-}
-// Replace all assorted parse_x_escape with this? Avoids the backwards/peek
-// issue.
-bool
-Lexer::parse_escape (int &length, char &output_char, char opening_char)
-{
- // skip to actual letter
- skip_input ();
- current_char = peek_input ();
- length++;
-
- switch (current_char)
- {
- case 'x':
- {
- // hex char string (null-terminated)
- char hexNum[3] = {0, 0, 0};
-
- // first hex char
- skip_input ();
- current_char = peek_input ();
- length++;
-
- if (!ISXDIGIT (current_char))
- {
- rust_error_at (get_current_location (),
- "invalid character '\\x%c' in \\x sequence",
- current_char);
- }
- hexNum[0] = current_char;
-
- // second hex char
- skip_input ();
- current_char = peek_input ();
- length++;
-
- if (!ISXDIGIT (current_char))
- {
- rust_error_at (get_current_location (),
- "invalid character '\\x%c' in \\x sequence",
- current_char);
- }
- hexNum[1] = current_char;
-
- long hexLong = ::std::strtol (hexNum, NULL, 16);
-
- if (hexLong > 127)
- rust_error_at (
- get_current_location (),
- "ascii \\x escape '\\x%s' out of range - allows up to '\\x7F'",
- hexNum);
- // gcc_assert(hexLong < 128); // as ascii
- char hexChar = static_cast<char> (hexLong);
-
- // TODO: fix - does this actually give the right character?
- output_char = hexChar;
- }
- break;
- case 'n':
- output_char = '\n';
- break;
- case 'r':
- output_char = '\r';
- break;
- case 't':
- output_char = '\t';
- break;
- case '\\':
- output_char = '\\';
- break;
- case '0':
- output_char = '\0';
- break;
- case '\'':
- output_char = '\'';
- break;
- case '"':
- output_char = '"';
- break;
- case 'u':
- {
- // TODO: shouldn't be used with this - use parse_utf8_escape
-
- skip_input ();
- current_char = peek_input ();
- length++;
-
- bool need_close_brace = false;
-
- // TODO: rustc lexer doesn't seem to allow not having { but mrustc lexer
- // does? look at spec?
- if (current_char == '{')
- {
- need_close_brace = true;
-
- skip_input ();
- current_char = peek_input ();
- length++;
- }
-
- // parse unicode escape
- // 1-6 hex digits?
- ::std::string num_str;
- num_str.reserve (6);
-
- // test adding number directly
- uint32_t test_val;
-
- // loop through to add entire hex number to string
- while (is_x_digit (current_char) || current_char == '_')
- {
- if (current_char == '_')
- {
- // don't add _ to number
- skip_input ();
- current_char = peek_input ();
-
- length++;
-
- continue;
- }
-
- length++;
-
- // add raw hex numbers
- num_str += current_char;
-
- // test adding number directly
- char tmp[2] = {current_char, 0};
- test_val *= 16;
- test_val += ::std::strtol (tmp, NULL, 16);
-
- skip_input ();
- current_char = peek_input ();
- }
-
- // ensure closing brace
- if (need_close_brace && current_char != '}')
- {
- // actually an error
- rust_error_at (get_current_location (),
- "expected terminating '}' in unicode escape");
- return false;
- }
-
- // ensure 1-6 hex characters
- if (num_str.length () > 6 || num_str.length () < 1)
- {
- rust_error_at (get_current_location (),
- "unicode escape should be between 1 and 6 hex "
- "characters; it is %lu",
- num_str.length ());
- return false;
- }
-
- long hex_num = ::std::strtol (num_str.c_str (), NULL, 16);
-
- // as debug, check hex_num = test_val
- if (hex_num > 255)
- {
- rust_error_at (
- get_current_location (),
- "non-ascii chars not implemented yet, defaulting to 0");
- hex_num = 0;
- }
-
- // make output_char the value - UTF-8?
- // TODO: actually make this work - output char must be 4 bytes, do I
- // need a string for this?
- output_char = static_cast</*uint32_t*/ char> (hex_num);
-
- return true;
- }
- break;
- case '\r':
- case '\n':
- // string continue
- while (is_whitespace (current_char))
- {
- if (current_char == '\n')
- {
- current_line++;
- current_column = 1;
- // tell line_table that new line starts
- linemap_line_start (::line_table, current_line, max_column_hint);
-
- // reset "length"
- length = 1;
-
- // get next char
- skip_input ();
- current_char = peek_input ();
-
- continue;
- }
-
- skip_input ();
- current_char = peek_input ();
- length++;
- }
-
- if (current_char == '\\')
- {
- parse_escape (length, output_char, opening_char);
- return true;
- }
- else if (current_char == opening_char)
- {
- // TODO: does this skip the ' or " character? It shouldn't.
- output_char = 0;
- return true;
- }
- else
- {
- output_char = current_char;
-
- // TODO: test has right result
- /*skip_input();
- current_char = peek_input();*/
-
- return true;
- }
- default:
- rust_error_at (get_current_location (), "unknown escape sequence '\\%c'",
- current_char);
- // returns false if no parsing could be done
- return false;
- break;
+ // Replace all assorted parse_x_escape with this? Avoids the backwards/peek issue.
+ bool Lexer::parse_escape(int& length, char& output_char, char opening_char) {
+ // skip to actual letter
+ skip_input();
+ current_char = peek_input();
+ length++;
+
+ switch (current_char) {
+ case 'x': {
+ // hex char string (null-terminated)
+ char hexNum[3] = { 0, 0, 0 };
+
+ // first hex char
+ skip_input();
+ current_char = peek_input();
+ length++;
+
+ if (!ISXDIGIT(current_char)) {
+ rust_error_at(get_current_location(), "invalid character '\\x%c' in \\x sequence",
+ current_char);
+ }
+ hexNum[0] = current_char;
+
+ // second hex char
+ skip_input();
+ current_char = peek_input();
+ length++;
+
+ if (!ISXDIGIT(current_char)) {
+ rust_error_at(get_current_location(), "invalid character '\\x%c' in \\x sequence",
+ current_char);
+ }
+ hexNum[1] = current_char;
+
+ long hexLong = ::std::strtol(hexNum, NULL, 16);
+
+ if (hexLong > 127)
+ rust_error_at(get_current_location(),
+ "ascii \\x escape '\\x%s' out of range - allows up to '\\x7F'", hexNum);
+ // gcc_assert(hexLong < 128); // as ascii
+ char hexChar = static_cast<char>(hexLong);
+
+ // TODO: fix - does this actually give the right character?
+ output_char = hexChar;
+ } break;
+ case 'n':
+ output_char = '\n';
+ break;
+ case 'r':
+ output_char = '\r';
+ break;
+ case 't':
+ output_char = '\t';
+ break;
+ case '\\':
+ output_char = '\\';
+ break;
+ case '0':
+ output_char = '\0';
+ break;
+ case '\'':
+ output_char = '\'';
+ break;
+ case '"':
+ output_char = '"';
+ break;
+ case 'u': {
+ // TODO: shouldn't be used with this - use parse_utf8_escape
+
+ skip_input();
+ current_char = peek_input();
+ length++;
+
+ bool need_close_brace = false;
+
+ // TODO: rustc lexer doesn't seem to allow not having { but mrustc lexer does? look at
+ // spec?
+ if (current_char == '{') {
+ need_close_brace = true;
+
+ skip_input();
+ current_char = peek_input();
+ length++;
+ }
+
+ // parse unicode escape
+ // 1-6 hex digits?
+ ::std::string num_str;
+ num_str.reserve(6);
+
+ // test adding number directly
+ uint32_t test_val;
+
+ // loop through to add entire hex number to string
+ while (is_x_digit(current_char) || current_char == '_') {
+ if (current_char == '_') {
+ // don't add _ to number
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+
+ continue;
+ }
+
+ length++;
+
+ // add raw hex numbers
+ num_str += current_char;
+
+ // test adding number directly
+ char tmp[2] = { current_char, 0 };
+ test_val *= 16;
+ test_val += ::std::strtol(tmp, NULL, 16);
+
+ skip_input();
+ current_char = peek_input();
+ }
+
+ // ensure closing brace
+ if (need_close_brace && current_char != '}') {
+ // actually an error
+ rust_error_at(
+ get_current_location(), "expected terminating '}' in unicode escape");
+ return false;
+ }
+
+ // ensure 1-6 hex characters
+ if (num_str.length() > 6 || num_str.length() < 1) {
+ rust_error_at(get_current_location(),
+ "unicode escape should be between 1 and 6 hex characters; it is %lu",
+ num_str.length());
+ return false;
+ }
+
+ long hex_num = ::std::strtol(num_str.c_str(), NULL, 16);
+
+ // as debug, check hex_num = test_val
+ if (hex_num > 255) {
+ rust_error_at(
+ get_current_location(), "non-ascii chars not implemented yet, defaulting to 0");
+ hex_num = 0;
+ }
+
+ // make output_char the value - UTF-8?
+ // TODO: actually make this work - output char must be 4 bytes, do I need a string for
+ // this?
+ output_char = static_cast</*uint32_t*/ char>(hex_num);
+
+ return true;
+ } break;
+ case '\r':
+ case '\n':
+ // string continue
+ while (is_whitespace(current_char)) {
+ if (current_char == '\n') {
+ current_line++;
+ current_column = 1;
+ // tell line_table that new line starts
+ linemap_line_start(::line_table, current_line, max_column_hint);
+
+ // reset "length"
+ length = 1;
+
+ // get next char
+ skip_input();
+ current_char = peek_input();
+
+ continue;
+ }
+
+ skip_input();
+ current_char = peek_input();
+ length++;
+ }
+
+ if (current_char == '\\') {
+ parse_escape(length, output_char, opening_char);
+ return true;
+ } else if (current_char == opening_char) {
+ // TODO: does this skip the ' or " character? It shouldn't.
+ output_char = 0;
+ return true;
+ } else {
+ output_char = current_char;
+
+ // TODO: test has right result
+ /*skip_input();
+ current_char = peek_input();*/
+
+ return true;
+ }
+ default:
+ rust_error_at(get_current_location(), "unknown escape sequence '\\%c'", current_char);
+ // returns false if no parsing could be done
+ return false;
+ break;
+ }
+ // all non-special cases (unicode, string continue) should skip their used char
+ skip_input();
+ current_char = peek_input();
+ length++;
+
+ // returns true if parsing was successful
+ return true;
}
- // all non-special cases (unicode, string continue) should skip their used
- // char
- skip_input ();
- current_char = peek_input ();
- length++;
-
- // returns true if parsing was successful
- return true;
-}
-bool
-Lexer::parse_utf8_escape (int &length, Codepoint &output_char,
- char opening_char)
-{
- // skip to actual letter
- skip_input ();
- current_char = peek_input ();
- length++;
-
- switch (current_char)
- {
- case 'x':
- {
- // hex char string (null-terminated)
- char hexNum[3] = {0, 0, 0};
-
- // first hex char
- skip_input ();
- current_char = peek_input ();
- length++;
-
- if (!ISXDIGIT (current_char))
- {
- rust_error_at (get_current_location (),
- "invalid character '\\x%c' in \\x sequence",
- current_char);
- }
- hexNum[0] = current_char;
-
- // second hex char
- skip_input ();
- current_char = peek_input ();
- length++;
-
- if (!ISXDIGIT (current_char))
- {
- rust_error_at (get_current_location (),
- "invalid character '\\x%c' in \\x sequence",
- current_char);
- }
- hexNum[1] = current_char;
-
- long hexLong = ::std::strtol (hexNum, NULL, 16);
-
- if (hexLong > 127)
- rust_error_at (
- get_current_location (),
- "ascii \\x escape '\\x%s' out of range - allows up to '\\x7F'",
- hexNum);
- // gcc_assert(hexLong < 128); // as ascii
- char hexChar = static_cast<char> (hexLong);
-
- // TODO: fix - does this actually give the right character?
- output_char = hexChar;
- }
- break;
- case 'n':
- output_char = '\n';
- break;
- case 'r':
- output_char = '\r';
- break;
- case 't':
- output_char = '\t';
- break;
- case '\\':
- output_char = '\\';
- break;
- case '0':
- output_char = '\0';
- break;
- case '\'':
- output_char = '\'';
- break;
- case '"':
- output_char = '"';
- break;
- case 'u':
- {
- skip_input ();
- current_char = peek_input ();
- length++;
-
- bool need_close_brace = false;
-
- // TODO: rustc lexer doesn't seem to allow not having { but mrustc lexer
- // does? look at spec?
- if (current_char == '{')
- {
- need_close_brace = true;
-
- skip_input ();
- current_char = peek_input ();
- length++;
- }
-
- // parse unicode escape
- // 1-6 hex digits?
- ::std::string num_str;
- num_str.reserve (6);
-
- // test adding number directly
- uint32_t test_val;
-
- // loop through to add entire hex number to string
- while (is_x_digit (current_char) || current_char == '_')
- {
- if (current_char == '_')
- {
- // don't add _ to number
- skip_input ();
- current_char = peek_input ();
-
- length++;
-
- continue;
- }
-
- length++;
-
- // add raw hex numbers
- num_str += current_char;
-
- // test adding number directly
- char tmp[2] = {current_char, 0};
- test_val *= 16;
- test_val += ::std::strtol (tmp, NULL, 16);
-
- skip_input ();
- current_char = peek_input ();
- }
-
- // ensure closing brace if required
- if (need_close_brace)
- {
- if (current_char == '}')
- {
- skip_input ();
- current_char = peek_input ();
- length++;
- }
- else
- {
- // actually an error
- rust_error_at (get_current_location (),
- "expected terminating '}' in unicode escape");
- return false;
- }
- }
-
- // ensure 1-6 hex characters
- if (num_str.length () > 6 || num_str.length () < 1)
- {
- rust_error_at (get_current_location (),
- "unicode escape should be between 1 and 6 hex "
- "characters; it is %lu",
- num_str.length ());
- return false;
- }
-
- long hex_num = ::std::strtol (num_str.c_str (), NULL, 16);
-
- // assert fits a uint32_t
- gcc_assert (hex_num < 4294967296);
-
- // ok can't figure out how to just convert to codepoint or use "this" so
- // create new one
- output_char = Codepoint (static_cast<uint32_t> (hex_num));
-
- // TODO: what is being outputted? the escape code for the unicode char
- // (unicode number) or the character number?
-
- return true;
- }
- break;
- case '\r':
- case '\n':
- // string continue
- while (is_whitespace (current_char))
- {
- if (current_char == '\n')
- {
- current_line++;
- current_column = 1;
- // tell line_table that new line starts
- linemap_line_start (::line_table, current_line, max_column_hint);
-
- // reset "length"
- length = 1;
-
- // get next char
- skip_input ();
- current_char = peek_input ();
-
- continue;
- }
-
- skip_input ();
- current_char = peek_input ();
- length++;
- }
-
- if (current_char == '\\')
- {
- parse_utf8_escape (length, output_char, opening_char);
- return true;
- }
- else if (current_char == opening_char)
- {
- // TODO: does this skip the ' or " character? It shouldn't.
- output_char = 0;
- return true;
- }
- else
- {
- output_char = current_char;
-
- // TODO: test has right result
- /*skip_input();
- current_char = peek_input();*/
-
- return true;
- }
- default:
- rust_error_at (get_current_location (), "unknown escape sequence '\\%c'",
- current_char);
- // returns false if no parsing could be done
- return false;
- break;
+ bool Lexer::parse_utf8_escape(int& length, Codepoint& output_char, char opening_char) {
+ // skip to actual letter
+ skip_input();
+ current_char = peek_input();
+ length++;
+
+ switch (current_char) {
+ case 'x': {
+ // hex char string (null-terminated)
+ char hexNum[3] = { 0, 0, 0 };
+
+ // first hex char
+ skip_input();
+ current_char = peek_input();
+ length++;
+
+ if (!ISXDIGIT(current_char)) {
+ rust_error_at(get_current_location(), "invalid character '\\x%c' in \\x sequence",
+ current_char);
+ }
+ hexNum[0] = current_char;
+
+ // second hex char
+ skip_input();
+ current_char = peek_input();
+ length++;
+
+ if (!ISXDIGIT(current_char)) {
+ rust_error_at(get_current_location(), "invalid character '\\x%c' in \\x sequence",
+ current_char);
+ }
+ hexNum[1] = current_char;
+
+ long hexLong = ::std::strtol(hexNum, NULL, 16);
+
+ if (hexLong > 127)
+ rust_error_at(get_current_location(),
+ "ascii \\x escape '\\x%s' out of range - allows up to '\\x7F'", hexNum);
+ // gcc_assert(hexLong < 128); // as ascii
+ char hexChar = static_cast<char>(hexLong);
+
+ // TODO: fix - does this actually give the right character?
+ output_char = hexChar;
+ } break;
+ case 'n':
+ output_char = '\n';
+ break;
+ case 'r':
+ output_char = '\r';
+ break;
+ case 't':
+ output_char = '\t';
+ break;
+ case '\\':
+ output_char = '\\';
+ break;
+ case '0':
+ output_char = '\0';
+ break;
+ case '\'':
+ output_char = '\'';
+ break;
+ case '"':
+ output_char = '"';
+ break;
+ case 'u': {
+ skip_input();
+ current_char = peek_input();
+ length++;
+
+ bool need_close_brace = false;
+
+ // TODO: rustc lexer doesn't seem to allow not having { but mrustc lexer does? look at
+ // spec?
+ if (current_char == '{') {
+ need_close_brace = true;
+
+ skip_input();
+ current_char = peek_input();
+ length++;
+ }
+
+ // parse unicode escape
+ // 1-6 hex digits?
+ ::std::string num_str;
+ num_str.reserve(6);
+
+ // test adding number directly
+ uint32_t test_val;
+
+ // loop through to add entire hex number to string
+ while (is_x_digit(current_char) || current_char == '_') {
+ if (current_char == '_') {
+ // don't add _ to number
+ skip_input();
+ current_char = peek_input();
+
+ length++;
+
+ continue;
+ }
+
+ length++;
+
+ // add raw hex numbers
+ num_str += current_char;
+
+ // test adding number directly
+ char tmp[2] = { current_char, 0 };
+ test_val *= 16;
+ test_val += ::std::strtol(tmp, NULL, 16);
+
+ skip_input();
+ current_char = peek_input();
+ }
+
+ // ensure closing brace if required
+ if (need_close_brace) {
+ if (current_char == '}') {
+ skip_input();
+ current_char = peek_input();
+ length++;
+ } else {
+ // actually an error
+ rust_error_at(
+ get_current_location(), "expected terminating '}' in unicode escape");
+ return false;
+ }
+ }
+
+ // ensure 1-6 hex characters
+ if (num_str.length() > 6 || num_str.length() < 1) {
+ rust_error_at(get_current_location(),
+ "unicode escape should be between 1 and 6 hex characters; it is %lu",
+ num_str.length());
+ return false;
+ }
+
+ long hex_num = ::std::strtol(num_str.c_str(), NULL, 16);
+
+ // assert fits a uint32_t
+ rust_assert(hex_num < 4294967296);
+
+ // ok can't figure out how to just convert to codepoint or use "this" so create new
+ // one
+ output_char = Codepoint(static_cast<uint32_t>(hex_num));
+
+ // TODO: what is being outputted? the escape code for the unicode char (unicode
+ // number) or the character number?
+
+ return true;
+ } break;
+ case '\r':
+ case '\n':
+ // string continue
+ while (is_whitespace(current_char)) {
+ if (current_char == '\n') {
+ current_line++;
+ current_column = 1;
+ // tell line_table that new line starts
+ linemap_line_start(::line_table, current_line, max_column_hint);
+
+ // reset "length"
+ length = 1;
+
+ // get next char
+ skip_input();
+ current_char = peek_input();
+
+ continue;
+ }
+
+ skip_input();
+ current_char = peek_input();
+ length++;
+ }
+
+ if (current_char == '\\') {
+ parse_utf8_escape(length, output_char, opening_char);
+ return true;
+ } else if (current_char == opening_char) {
+ // TODO: does this skip the ' or " character? It shouldn't.
+ output_char = 0;
+ return true;
+ } else {
+ output_char = current_char;
+
+ // TODO: test has right result
+ /*skip_input();
+ current_char = peek_input();*/
+
+ return true;
+ }
+ default:
+ rust_error_at(get_current_location(), "unknown escape sequence '\\%c'", current_char);
+ // returns false if no parsing could be done
+ return false;
+ break;
+ }
+ // all non-special cases (unicode, string continue) should skip their used char
+ skip_input();
+ current_char = peek_input();
+ length++;
+
+ // returns true if parsing was successful
+ return true;
}
- // all non-special cases (unicode, string continue) should skip their used
- // char
- skip_input ();
- current_char = peek_input ();
- length++;
-
- // returns true if parsing was successful
- return true;
-}
#if 0
bool Lexer::parse_ascii_escape(/*char& current_char, */ int& length, char& output_char) {
@@ -2495,305 +2102,262 @@ Lexer::parse_utf8_escape (int &length, Codepoint &output_char,
}
#endif
-int
-Lexer::test_get_input_codepoint_length ()
-{
- uint8_t input = peek_input ();
-
- if (input < 128)
- {
- // ascii -- 1 byte
- // return input;
-
- return 1;
+ int Lexer::test_get_input_codepoint_length() {
+ uint8_t input = peek_input();
+
+ if (input < 128) {
+ // ascii -- 1 byte
+ // return input;
+
+ return 1;
+ } else if ((input & 0xC0) == 0x80) {
+ // invalid (continuation; can't be first char)
+ // return 0xFFFE;
+
+ return 0;
+ } else if ((input & 0xE0) == 0xC0) {
+ // 2 bytes
+ uint8_t input2 = peek_input(1);
+ if ((input2 & 0xC0) != 0x80)
+ return 0;
+ // return 0xFFFE;
+
+ // uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0);
+ // return output;
+ return 2;
+ } else if ((input & 0xF0) == 0xE0) {
+ // 3 bytes
+ uint8_t input2 = peek_input(1);
+ if ((input2 & 0xC0) != 0x80)
+ return 0;
+ // return 0xFFFE;
+
+ uint8_t input3 = peek_input(2);
+ if ((input3 & 0xC0) != 0x80)
+ return 0;
+ // return 0xFFFE;
+
+ /*uint32_t output
+ = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << 0);
+ return output;*/
+ return 3;
+ } else if ((input & 0xF8) == 0xF0) {
+ // 4 bytes
+ uint8_t input2 = peek_input(1);
+ if ((input2 & 0xC0) != 0x80)
+ return 0;
+ // return 0xFFFE;
+
+ uint8_t input3 = peek_input(2);
+ if ((input3 & 0xC0) != 0x80)
+ return 0;
+ // return 0xFFFE;
+
+ uint8_t input4 = peek_input(3);
+ if ((input4 & 0xC0) != 0x80)
+ return 0;
+ // return 0xFFFE;
+
+ /*uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12)
+ | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0);
+ return output;*/
+ return 4;
+ } else {
+ rust_error_at(get_current_location(), "invalid UTF-8 (too long)");
+ return 0;
+ }
}
- else if ((input & 0xC0) == 0x80)
- {
- // invalid (continuation; can't be first char)
- // return 0xFFFE;
- return 0;
- }
- else if ((input & 0xE0) == 0xC0)
- {
- // 2 bytes
- uint8_t input2 = peek_input (1);
- if ((input2 & 0xC0) != 0x80)
- return 0;
- // return 0xFFFE;
-
- // uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0);
- // return output;
- return 2;
- }
- else if ((input & 0xF0) == 0xE0)
- {
- // 3 bytes
- uint8_t input2 = peek_input (1);
- if ((input2 & 0xC0) != 0x80)
- return 0;
- // return 0xFFFE;
-
- uint8_t input3 = peek_input (2);
- if ((input3 & 0xC0) != 0x80)
- return 0;
- // return 0xFFFE;
-
- /*uint32_t output
- = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) <<
- 0); return output;*/
- return 3;
- }
- else if ((input & 0xF8) == 0xF0)
- {
- // 4 bytes
- uint8_t input2 = peek_input (1);
- if ((input2 & 0xC0) != 0x80)
- return 0;
- // return 0xFFFE;
-
- uint8_t input3 = peek_input (2);
- if ((input3 & 0xC0) != 0x80)
- return 0;
- // return 0xFFFE;
-
- uint8_t input4 = peek_input (3);
- if ((input4 & 0xC0) != 0x80)
- return 0;
- // return 0xFFFE;
-
- /*uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12)
- | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0);
- return output;*/
- return 4;
- }
- else
- {
- rust_error_at (get_current_location (), "invalid UTF-8 (too long)");
- return 0;
+ // TODO: rewrite lexing system to use utf-8 "codepoints" rather than bytes?
+ Codepoint Lexer::test_peek_codepoint_input() {
+ uint8_t input = peek_input();
+
+ if (input < 128) {
+ // ascii -- 1 byte
+ return { input };
+ } else if ((input & 0xC0) == 0x80) {
+ // invalid (continuation; can't be first char)
+ return { 0xFFFE };
+ } else if ((input & 0xE0) == 0xC0) {
+ // 2 bytes
+ uint8_t input2 = peek_input(1);
+ if ((input2 & 0xC0) != 0x80)
+ return { 0xFFFE };
+
+ uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0);
+ return { output };
+ } else if ((input & 0xF0) == 0xE0) {
+ // 3 bytes
+ uint8_t input2 = peek_input(1);
+ if ((input2 & 0xC0) != 0x80)
+ return { 0xFFFE };
+
+ uint8_t input3 = peek_input(2);
+ if ((input3 & 0xC0) != 0x80)
+ return { 0xFFFE };
+
+ uint32_t output
+ = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << 0);
+ return { output };
+ } else if ((input & 0xF8) == 0xF0) {
+ // 4 bytes
+ uint8_t input2 = peek_input(1);
+ if ((input2 & 0xC0) != 0x80)
+ return { 0xFFFE };
+
+ uint8_t input3 = peek_input(2);
+ if ((input3 & 0xC0) != 0x80)
+ return { 0xFFFE };
+
+ uint8_t input4 = peek_input(3);
+ if ((input4 & 0xC0) != 0x80)
+ return { 0xFFFE };
+
+ uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12)
+ | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0);
+ return { output };
+ } else {
+ rust_error_at(get_current_location(), "invalid UTF-8 (too long)");
+ return { 0xFFFE };
+ }
}
-}
-// TODO: rewrite lexing system to use utf-8 "codepoints" rather than bytes?
-Codepoint
-Lexer::test_peek_codepoint_input ()
-{
- uint8_t input = peek_input ();
+ void Lexer::test_skip_codepoint_input() {
+ int toSkip = test_get_input_codepoint_length();
+ rust_assert(toSkip >= 1);
- if (input < 128)
- {
- // ascii -- 1 byte
- return {input};
- }
- else if ((input & 0xC0) == 0x80)
- {
- // invalid (continuation; can't be first char)
- return {0xFFFE};
- }
- else if ((input & 0xE0) == 0xC0)
- {
- // 2 bytes
- uint8_t input2 = peek_input (1);
- if ((input2 & 0xC0) != 0x80)
- return {0xFFFE};
-
- uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0);
- return {output};
- }
- else if ((input & 0xF0) == 0xE0)
- {
- // 3 bytes
- uint8_t input2 = peek_input (1);
- if ((input2 & 0xC0) != 0x80)
- return {0xFFFE};
-
- uint8_t input3 = peek_input (2);
- if ((input3 & 0xC0) != 0x80)
- return {0xFFFE};
-
- uint32_t output = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6)
- | ((input3 & 0x3F) << 0);
- return {output};
- }
- else if ((input & 0xF8) == 0xF0)
- {
- // 4 bytes
- uint8_t input2 = peek_input (1);
- if ((input2 & 0xC0) != 0x80)
- return {0xFFFE};
-
- uint8_t input3 = peek_input (2);
- if ((input3 & 0xC0) != 0x80)
- return {0xFFFE};
-
- uint8_t input4 = peek_input (3);
- if ((input4 & 0xC0) != 0x80)
- return {0xFFFE};
-
- uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12)
- | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0);
- return {output};
+ skip_input(toSkip - 1);
}
- else
- {
- rust_error_at (get_current_location (), "invalid UTF-8 (too long)");
- return {0xFFFE};
- }
-}
-
-void
-Lexer::test_skip_codepoint_input ()
-{
- int toSkip = test_get_input_codepoint_length ();
- gcc_assert (toSkip >= 1);
-
- skip_input (toSkip - 1);
-}
-
-int
-Lexer::test_get_input_codepoint_n_length (int n_start_offset)
-{
- uint8_t input = peek_input (n_start_offset);
- if (input < 128)
- {
- // ascii -- 1 byte
- // return input;
- return 1;
- }
- else if ((input & 0xC0) == 0x80)
- {
- // invalid (continuation; can't be first char)
- // return 0xFFFE;
- return 0;
- }
- else if ((input & 0xE0) == 0xC0)
- {
- // 2 bytes
- uint8_t input2 = peek_input (n_start_offset + 1);
- if ((input2 & 0xC0) != 0x80)
- // return 0xFFFE;
- return 0;
-
- // uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0);
- // return output;
- return 2;
- }
- else if ((input & 0xF0) == 0xE0)
- {
- // 3 bytes
- uint8_t input2 = peek_input (n_start_offset + 1);
- if ((input2 & 0xC0) != 0x80)
- // return 0xFFFE;
- return 0;
-
- uint8_t input3 = peek_input (n_start_offset + 2);
- if ((input3 & 0xC0) != 0x80)
- // return 0xFFFE;
- return 0;
-
- /*uint32_t output
- = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) <<
- 0); return output;*/
- return 3;
- }
- else if ((input & 0xF8) == 0xF0)
- {
- // 4 bytes
- uint8_t input2 = peek_input (n_start_offset + 1);
- if ((input2 & 0xC0) != 0x80)
- // return 0xFFFE;
- return 0;
-
- uint8_t input3 = peek_input (n_start_offset + 2);
- if ((input3 & 0xC0) != 0x80)
- // return 0xFFFE;
- return 0;
-
- uint8_t input4 = peek_input (n_start_offset + 3);
- if ((input4 & 0xC0) != 0x80)
- // return 0xFFFE;
- return 0;
-
- /*uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12)
- | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0);
- return output;*/
- return 4;
- }
- else
- {
- rust_error_at (get_current_location (), "invalid UTF-8 (too long)");
- return 0;
+ int Lexer::test_get_input_codepoint_n_length(int n_start_offset) {
+ uint8_t input = peek_input(n_start_offset);
+
+ if (input < 128) {
+ // ascii -- 1 byte
+ // return input;
+ return 1;
+ } else if ((input & 0xC0) == 0x80) {
+ // invalid (continuation; can't be first char)
+ // return 0xFFFE;
+ return 0;
+ } else if ((input & 0xE0) == 0xC0) {
+ // 2 bytes
+ uint8_t input2 = peek_input(n_start_offset + 1);
+ if ((input2 & 0xC0) != 0x80)
+ // return 0xFFFE;
+ return 0;
+
+ // uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0);
+ // return output;
+ return 2;
+ } else if ((input & 0xF0) == 0xE0) {
+ // 3 bytes
+ uint8_t input2 = peek_input(n_start_offset + 1);
+ if ((input2 & 0xC0) != 0x80)
+ // return 0xFFFE;
+ return 0;
+
+ uint8_t input3 = peek_input(n_start_offset + 2);
+ if ((input3 & 0xC0) != 0x80)
+ // return 0xFFFE;
+ return 0;
+
+ /*uint32_t output
+ = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << 0);
+ return output;*/
+ return 3;
+ } else if ((input & 0xF8) == 0xF0) {
+ // 4 bytes
+ uint8_t input2 = peek_input(n_start_offset + 1);
+ if ((input2 & 0xC0) != 0x80)
+ // return 0xFFFE;
+ return 0;
+
+ uint8_t input3 = peek_input(n_start_offset + 2);
+ if ((input3 & 0xC0) != 0x80)
+ // return 0xFFFE;
+ return 0;
+
+ uint8_t input4 = peek_input(n_start_offset + 3);
+ if ((input4 & 0xC0) != 0x80)
+ // return 0xFFFE;
+ return 0;
+
+ /*uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12)
+ | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0);
+ return output;*/
+ return 4;
+ } else {
+ rust_error_at(get_current_location(), "invalid UTF-8 (too long)");
+ return 0;
+ }
}
-}
-// peeks the codepoint input at n codepoints ahead of current codepoint - try
-// not to use
-Codepoint
-Lexer::test_peek_codepoint_input (int n)
-{
- int totalOffset = 0;
-
- // add up all offsets into total offset? does this do what I want?
- for (int i = 0; i < n; i++)
- {
- totalOffset += test_get_input_codepoint_n_length (totalOffset);
+ // peeks the codepoint input at n codepoints ahead of current codepoint - try not to use
+ Codepoint Lexer::test_peek_codepoint_input(int n) {
+ int totalOffset = 0;
+
+ // add up all offsets into total offset? does this do what I want?
+ for (int i = 0; i < n; i++) {
+ totalOffset += test_get_input_codepoint_n_length(totalOffset);
+ }
+ // issues: this would have (at least) O(n) lookup time, not O(1) like the rest?
+
+ // TODO: implement if still needed
+
+ // error out of function as it is not implemented
+ rust_assert(1 == 0);
+ return { 0 };
+ /*
+ uint8_t input = peek_input();
+
+ if (input < 128) {
+ // ascii -- 1 byte
+ return input;
+ } else if ((input & 0xC0) == 0x80) {
+ // invalid (continuation; can't be first char)
+ return 0xFFFE;
+ } else if ((input & 0xE0) == 0xC0) {
+ // 2 bytes
+ uint8_t input2 = peek_input(1);
+ if ((input2 & 0xC0) != 0x80)
+ return 0xFFFE;
+
+ uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0);
+ return output;
+ } else if ((input & 0xF0) == 0xE0) {
+ // 3 bytes
+ uint8_t input2 = peek_input(1);
+ if ((input2 & 0xC0) != 0x80)
+ return 0xFFFE;
+
+ uint8_t input3 = peek_input(2);
+ if ((input3 & 0xC0) != 0x80)
+ return 0xFFFE;
+
+ uint32_t output
+ = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << 0);
+ return output;
+ } else if ((input & 0xF8) == 0xF0) {
+ // 4 bytes
+ uint8_t input2 = peek_input(1);
+ if ((input2 & 0xC0) != 0x80)
+ return 0xFFFE;
+
+ uint8_t input3 = peek_input(2);
+ if ((input3 & 0xC0) != 0x80)
+ return 0xFFFE;
+
+ uint8_t input4 = peek_input(3);
+ if ((input4 & 0xC0) != 0x80)
+ return 0xFFFE;
+
+ uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12)
+ | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0);
+ return output;
+ } else {
+ rust_error_at(get_current_location(), "invalid UTF-8 (too long)");
+ return 0xFFFE;
+ }*/
}
- // issues: this would have (at least) O(n) lookup time, not O(1) like the
- // rest?
-
- // TODO: implement if still needed
-
- // error out of function as it is not implemented
- gcc_assert (1 == 0);
- return {0};
- /*
- uint8_t input = peek_input();
-
- if (input < 128) {
- // ascii -- 1 byte
- return input;
- } else if ((input & 0xC0) == 0x80) {
- // invalid (continuation; can't be first char)
- return 0xFFFE;
- } else if ((input & 0xE0) == 0xC0) {
- // 2 bytes
- uint8_t input2 = peek_input(1);
- if ((input2 & 0xC0) != 0x80)
- return 0xFFFE;
-
- uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0);
- return output;
- } else if ((input & 0xF0) == 0xE0) {
- // 3 bytes
- uint8_t input2 = peek_input(1);
- if ((input2 & 0xC0) != 0x80)
- return 0xFFFE;
-
- uint8_t input3 = peek_input(2);
- if ((input3 & 0xC0) != 0x80)
- return 0xFFFE;
-
- uint32_t output
- = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 &
- 0x3F) << 0); return output; } else if ((input & 0xF8) == 0xF0) {
- // 4 bytes
- uint8_t input2 = peek_input(1);
- if ((input2 & 0xC0) != 0x80)
- return 0xFFFE;
-
- uint8_t input3 = peek_input(2);
- if ((input3 & 0xC0) != 0x80)
- return 0xFFFE;
-
- uint8_t input4 = peek_input(3);
- if ((input4 & 0xC0) != 0x80)
- return 0xFFFE;
-
- uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12)
- | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) <<
- 0); return output; } else { rust_error_at(get_current_location(), "invalid
- UTF-8 (too long)"); return 0xFFFE;
- }*/
}
-} // namespace Rust
diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc
index 2247519..8e9e4a3 100644
--- a/gcc/rust/rust-session-manager.cc
+++ b/gcc/rust/rust-session-manager.cc
@@ -5,6 +5,7 @@
#include "target.h"
#include "tm.h"
+#include "tm_p.h"
#include "rust-lex.h"
#include "rust-parse.h"
@@ -13,799 +14,764 @@
#include <algorithm>
-extern Linemap *
-rust_get_linemap ();
+extern Linemap* rust_get_linemap();
namespace Rust {
-// Simple wrapper for FILE* that simplifies destruction.
-struct RAIIFile
-{
- FILE *file;
-
- RAIIFile (const char *filename) : file (fopen (filename, "r")) {}
-
- ~RAIIFile () { fclose (file); }
-};
-
-// Implicitly enable a target_feature (and recursively enable dependencies).
-void
-Session::implicitly_enable_feature (::std::string feature_name)
-{
- // TODO: is this really required since features added would be complete via
- // target spec?
-
- if (!options.target_data.has_key_value_pair ("target_data", feature_name))
- {
- // if feature has dependencies, enable them
- if (feature_name == "aes")
- {
- implicitly_enable_feature ("sse2");
- }
- else if (feature_name == "avx")
- {
- implicitly_enable_feature ("sse4.2");
- }
- else if (feature_name == "avx2")
- {
- implicitly_enable_feature ("avx");
- }
- else if (feature_name == "fma")
- {
- implicitly_enable_feature ("avx");
- }
- else if (feature_name == "pclmulqdq")
- {
- implicitly_enable_feature ("sse2");
- }
- else if (feature_name == "sha")
- {
- implicitly_enable_feature ("sse2");
- }
- else if (feature_name == "sse2")
- {
- implicitly_enable_feature ("sse");
- }
- else if (feature_name == "sse3")
- {
- implicitly_enable_feature ("sse2");
- }
- else if (feature_name == "sse4.1")
- {
- implicitly_enable_feature ("sse3");
- }
- else if (feature_name == "sse4.2")
- {
- implicitly_enable_feature ("sse4.1");
- }
- else if (feature_name == "ssse3")
- {
- implicitly_enable_feature ("sse3");
- }
-
- options.target_data.insert_key_value_pair ("target_feature",
- ::std::move (feature_name));
+ // Simple wrapper for FILE* that simplifies destruction.
+ struct RAIIFile {
+ FILE* file;
+
+ RAIIFile(const char* filename) : file(fopen(filename, "r")) {}
+
+ ~RAIIFile() {
+ fclose(file);
+ }
+ };
+
+ // Implicitly enable a target_feature (and recursively enable dependencies).
+ void Session::implicitly_enable_feature(::std::string feature_name) {
+ // TODO: is this really required since features added would be complete via target spec?
+
+ if (!options.target_data.has_key_value_pair("target_data", feature_name)) {
+ // if feature has dependencies, enable them
+ if (feature_name == "aes") {
+ implicitly_enable_feature("sse2");
+ } else if (feature_name == "avx") {
+ implicitly_enable_feature("sse4.2");
+ } else if (feature_name == "avx2") {
+ implicitly_enable_feature("avx");
+ } else if (feature_name == "fma") {
+ implicitly_enable_feature("avx");
+ } else if (feature_name == "pclmulqdq") {
+ implicitly_enable_feature("sse2");
+ } else if (feature_name == "sha") {
+ implicitly_enable_feature("sse2");
+ } else if (feature_name == "sse2") {
+ implicitly_enable_feature("sse");
+ } else if (feature_name == "sse3") {
+ implicitly_enable_feature("sse2");
+ } else if (feature_name == "sse4.1") {
+ implicitly_enable_feature("sse3");
+ } else if (feature_name == "sse4.2") {
+ implicitly_enable_feature("sse4.1");
+ } else if (feature_name == "ssse3") {
+ implicitly_enable_feature("sse3");
+ }
+
+ options.target_data.insert_key_value_pair("target_feature", ::std::move(feature_name));
+ }
}
-}
-// Meant to enable all target features. As this will be done by target hook,
-// this method's deprecated.
-void
-Session::enable_features ()
-{
- bool has_target_crt_static = false;
- const char *target = "PLACEHOLDER";
-
- fprintf (
- stderr,
- "ERROR: Somewhere in call chain Session::enable_features is called.\n");
-
- if (has_target_crt_static)
- {
- // enable "crt-static" attribute
+ // Meant to enable all target features. As this will be done by target hook, this method's
+ // deprecated.
+ void Session::enable_features() {
+ bool has_target_crt_static = false;
+ const char* target = "PLACEHOLDER";
+
+ fprintf(stderr, "ERROR: Somewhere in call chain Session::enable_features is called.\n");
+
+ if (has_target_crt_static) {
+ // enable "crt-static" attribute
+ }
+
+ /* TODO: do this via target hook. have one for each target that implicitly enables the
+ * features for that platform. Would probably have to make custom target hook. */
+
+ /*
+ if (target == "x86" || target == "x86_64") {
+ if (TARGET_ISA_AES) {
+ // enable aes, implicitly enable sse2
+ implicitly_enable_feature("aes");
+ }
+
+ if (TARGET_ISA_AVX) {
+ // enable avx, implicitly enable sse4.2
+ implicitly_enable_feature("sse4.2");
+ }
+
+ if (TARGET_ISA_AVX2) {
+ // enable avx2, implicitly enable avx
+ implicitly_enable_feature("avx");
+ }
+
+ if (TARGET_ISA_BMI) {
+ // enable bmi1
+ implicitly_enable_feature("bmi1");
+ }
+
+ if (TARGET_ISA_BMI2) {
+ // enable bmi2
+ implicitly_enable_feature("bmi2");
+ }
+
+ if (TARGET_ISA_FMA) {
+ // enable fma, implicitly enable avx
+ implicitly_enable_feature("fma");
+ }
+
+ if (TARGET_ISA_FXSR) {
+ // enable fxsr
+ implicitly_enable_feature("fxsr");
+ }
+
+ if (TARGET_ISA_LZCNT) {
+ // enable lzcnt
+ implicitly_enable_feature("lzcnt");
+ }
+
+ if (TARGET_ISA_VPCLMULQDQ) {
+ // enable pclmulqdq, implicitly enable sse2
+ implicitly_enable_feature("pclmulqdq");
+ }
+
+ if (TARGET_ISA_POPCNT) {
+ // enable popcnt
+ implicitly_enable_feature("popcnt");
+ }
+
+ if (TARGET_ISA_RDRND) {
+ // enable rdrand
+ implicitly_enable_feature("rdrand");
+ }
+
+ if (TARGET_ISA_RDSEED) {
+ // enable rdseed
+ implicitly_enable_feature("rdseed");
+ }
+
+ if (TARGET_ISA_SHA) {
+ // enable sha, implicitly enable sse2
+ implicitly_enable_feature("sha");
+ }
+
+ if (TARGET_ISA_SSE) {
+ // enable sse
+ implicitly_enable_feature("sse");
+ }
+
+ if (TARGET_ISA_SSE2) {
+ // enable sse2, implicitly enable sse
+ implicitly_enable_feature("sse2");
+ }
+
+ if (TARGET_ISA_SSE3) {
+ // enable sse3, implicitly enable sse2
+ implicitly_enable_feature("sse3");
+ }
+
+ if (TARGET_ISA_SSE4_1) {
+ // enable sse4.1, implicitly enable sse3
+ implicitly_enable_feature("sse4.1");
+ }
+
+ if (TARGET_ISA_SSE4_2) {
+ // enable sse4.2, implicitly enable sse4.1
+ implicitly_enable_feature("sse4.2");
+ }
+
+ if (TARGET_ISA_SSSE3) {
+ // enable ssse3, implicitly enable sse3
+ implicitly_enable_feature("ssse3");
+ }
+
+ if (TARGET_ISA_XSAVE) {
+ // enable xsave
+ implicitly_enable_feature("xsave");
+ }
+
+ if (TARGET_ISA_XSAVEC) {
+ // enable xsavec
+ implicitly_enable_feature("xsavec");
+ }
+
+ if (TARGET_ISA_XSAVEOPT) {
+ // enable xsaveopt
+ implicitly_enable_feature("xsaveopt");
+ }
+
+ if (TARGET_ISA_XSAVES) {
+ // enable xsaves
+ implicitly_enable_feature("xsaves");
+ }
+ }
+ options.target_data.features.shrink_to_fit();
+ ::std::sort(options.target_data.features.begin(), options.target_data.features.end());*/
}
- /* TODO: do this via target hook. have one for each target that implicitly
- * enables the
- * features for that platform. Would probably have to make custom target hook.
- */
-
- /*
- if (target == "x86" || target == "x86_64") {
- if (TARGET_ISA_AES) {
- // enable aes, implicitly enable sse2
- implicitly_enable_feature("aes");
- }
-
- if (TARGET_ISA_AVX) {
- // enable avx, implicitly enable sse4.2
- implicitly_enable_feature("sse4.2");
- }
-
- if (TARGET_ISA_AVX2) {
- // enable avx2, implicitly enable avx
- implicitly_enable_feature("avx");
- }
-
- if (TARGET_ISA_BMI) {
- // enable bmi1
- implicitly_enable_feature("bmi1");
- }
-
- if (TARGET_ISA_BMI2) {
- // enable bmi2
- implicitly_enable_feature("bmi2");
- }
-
- if (TARGET_ISA_FMA) {
- // enable fma, implicitly enable avx
- implicitly_enable_feature("fma");
- }
-
- if (TARGET_ISA_FXSR) {
- // enable fxsr
- implicitly_enable_feature("fxsr");
- }
-
- if (TARGET_ISA_LZCNT) {
- // enable lzcnt
- implicitly_enable_feature("lzcnt");
- }
-
- if (TARGET_ISA_VPCLMULQDQ) {
- // enable pclmulqdq, implicitly enable sse2
- implicitly_enable_feature("pclmulqdq");
- }
-
- if (TARGET_ISA_POPCNT) {
- // enable popcnt
- implicitly_enable_feature("popcnt");
- }
-
- if (TARGET_ISA_RDRND) {
- // enable rdrand
- implicitly_enable_feature("rdrand");
- }
-
- if (TARGET_ISA_RDSEED) {
- // enable rdseed
- implicitly_enable_feature("rdseed");
- }
-
- if (TARGET_ISA_SHA) {
- // enable sha, implicitly enable sse2
- implicitly_enable_feature("sha");
- }
-
- if (TARGET_ISA_SSE) {
- // enable sse
- implicitly_enable_feature("sse");
- }
-
- if (TARGET_ISA_SSE2) {
- // enable sse2, implicitly enable sse
- implicitly_enable_feature("sse2");
- }
-
- if (TARGET_ISA_SSE3) {
- // enable sse3, implicitly enable sse2
- implicitly_enable_feature("sse3");
- }
-
- if (TARGET_ISA_SSE4_1) {
- // enable sse4.1, implicitly enable sse3
- implicitly_enable_feature("sse4.1");
- }
-
- if (TARGET_ISA_SSE4_2) {
- // enable sse4.2, implicitly enable sse4.1
- implicitly_enable_feature("sse4.2");
- }
-
- if (TARGET_ISA_SSSE3) {
- // enable ssse3, implicitly enable sse3
- implicitly_enable_feature("ssse3");
- }
-
- if (TARGET_ISA_XSAVE) {
- // enable xsave
- implicitly_enable_feature("xsave");
- }
-
- if (TARGET_ISA_XSAVEC) {
- // enable xsavec
- implicitly_enable_feature("xsavec");
- }
-
- if (TARGET_ISA_XSAVEOPT) {
- // enable xsaveopt
- implicitly_enable_feature("xsaveopt");
- }
-
- if (TARGET_ISA_XSAVES) {
- // enable xsaves
- implicitly_enable_feature("xsaves");
- }
- }
- options.target_data.features.shrink_to_fit();
- ::std::sort(options.target_data.features.begin(),
- options.target_data.features.end());*/
-}
-
-void
-Session::init ()
-{
- // nothing yet
-}
-
-// Initialise default options. Actually called before handle_option, unlike init
-// itself.
-void
-Session::init_options ()
-{
- options.dump_option = CompileOptions::NO_DUMP;
-}
-
-// Handle option selection.
-bool
-Session::handle_option (
- enum opt_code code, const char *arg, HOST_WIDE_INT value ATTRIBUTE_UNUSED,
- int kind ATTRIBUTE_UNUSED, location_t loc ATTRIBUTE_UNUSED,
- const struct cl_option_handlers *handlers ATTRIBUTE_UNUSED)
-{
- // used to store whether results of various stuff are successful
- bool ret = true;
-
- // Handles options as listed in lang.opt.
- switch (code)
- {
- case OPT_I:
- // TODO: add search path
- break;
- case OPT_L:
- // TODO: add library link path or something
- break;
- case OPT_frust_dump_:
- // enable dump and return whether this was successful
- if (arg != NULL)
- {
- ret = enable_dump (::std::string (arg));
- }
- else
- {
- ret = false;
- }
- break;
- // no option handling for -o
- default:
- // return 1 to indicate option is valid
- break;
+ void Session::init() {
+#ifndef TARGET_RUST_OS_INFO
+# define TARGET_RUST_OS_INFO()
+#endif
+//#define builtin_rust_info(KEY, VALUE) rust_add_target_info (KEY, VALUE)
+// might as well use c++ stuff
+#define builtin_rust_info(KEY, VALUE) options.target_data.insert_key_value_pair(KEY, VALUE)
+
+ // initialise target hooks
+ //targetrustm.rust_cpu_info();
+ //targetrustm.rust_os_info();
+ // ok, that's not working too well TODO - see if can salvage old implementation
+ TARGET_RUST_CPU_INFO();
+ TARGET_RUST_OS_INFO();
+
+#undef builtin_rust_info
+
+ // target-independent values that should exist in all targets
+ options.target_data.insert_key_value_pair("target_pointer_width", std::to_string(POINTER_SIZE));
+ options.target_data.insert_key_value_pair("target_endian", BYTES_BIG_ENDIAN ? "big" : "little");
+
+ // TODO: find min atomic width and max atomic width
+ // from it, add atomic-related stuff for sizes 8, 16, 32, 64, and 128 (if inside bounds)
+ // in rustc, min atomic width is a known quantity (or 8 if not known), and max is also a known quantity (or is pointer size if not known)
+ // TODO: add atomic pointer if some criteria is satisfied
+
+ // TODO: find whether target has "atomic cas"
+
+ // add debug_assertions if enabled and proc_macro if crate type has it or whatever
+
+ // derived values from hook
+ options.target_data.init_derived_values();
}
- return ret;
-}
-
-/* Enables a certain dump depending on the name passed in. Returns true if name
- * is valid, false otherwise. */
-bool
-Session::enable_dump (::std::string arg)
-{
- // FIXME: change dumping algorithm when new non-inhibiting dump system is
- // created
- if (arg == "all")
- {
- error_at (
- UNKNOWN_LOCATION,
- "dumping all is not supported as of now. choose 'lex' or 'parse'");
- return false;
- }
- else if (arg == "lex")
- {
- options.dump_option = CompileOptions::LEXER_DUMP;
- }
- else if (arg == "parse")
- {
- options.dump_option = CompileOptions::PARSER_AST_DUMP;
- }
- else if (arg == "register_plugins")
- {
- options.dump_option = CompileOptions::REGISTER_PLUGINS_DUMP;
+ // Initialise default options. Actually called before handle_option, unlike init itself.
+ void Session::init_options() {
+ options.dump_option = CompileOptions::NO_DUMP;
}
- else if (arg == "injection")
- {
- options.dump_option = CompileOptions::INJECTION_DUMP;
- }
- else if (arg == "expansion")
- {
- options.dump_option = CompileOptions::EXPANSION_DUMP;
- }
- else if (arg == "name_resolution")
- {
- options.dump_option = CompileOptions::NAME_RESOLUTION_DUMP;
- }
- else if (arg == "")
- {
- error_at (UNKNOWN_LOCATION,
- "dump option was not given a name. choose 'lex' or 'parse'");
- return false;
- }
- else
- {
- error_at (UNKNOWN_LOCATION,
- "dump option '%s' was unrecognised. choose 'lex' or 'parse'",
- arg.c_str ());
- return false;
- }
- return true;
-}
-/* Actual main entry point for front-end. Called from langhook to parse files.
- */
-void
-Session::parse_files (int num_files, const char **files)
-{
- for (int i = 0; i < num_files; i++)
- {
- parse_file (files[i]);
+ // Handle option selection.
+ bool Session::handle_option(enum opt_code code, const char* arg,
+ HOST_WIDE_INT value ATTRIBUTE_UNUSED, int kind ATTRIBUTE_UNUSED,
+ location_t loc ATTRIBUTE_UNUSED, const struct cl_option_handlers* handlers ATTRIBUTE_UNUSED) {
+ // used to store whether results of various stuff are successful
+ bool ret = true;
+
+ // Handles options as listed in lang.opt.
+ switch (code) {
+ case OPT_I:
+ // TODO: add search path
+ break;
+ case OPT_L:
+ // TODO: add library link path or something
+ break;
+ case OPT_frust_dump_:
+ // enable dump and return whether this was successful
+ if (arg != NULL) {
+ ret = enable_dump(::std::string(arg));
+ } else {
+ ret = false;
+ }
+ break;
+ // no option handling for -o
+ default:
+ // return 1 to indicate option is valid
+ break;
+ }
+
+ return ret;
}
- // TODO: should semantic analysis be dealed with here? or per file? for now,
- // per-file.
-}
-
-// Parses a single file with filename filename.
-void
-Session::parse_file (const char *filename)
-{
- RAIIFile file_wrap (filename);
- if (file_wrap.file == NULL)
- {
- fatal_error (UNKNOWN_LOCATION, "cannot open filename %s: %m", filename);
+ /* Enables a certain dump depending on the name passed in. Returns true if name is valid, false
+ * otherwise. */
+ bool Session::enable_dump(::std::string arg) {
+ // FIXME: change dumping algorithm when new non-inhibiting dump system is created
+ if (arg == "all") {
+ error_at(
+ UNKNOWN_LOCATION, "dumping all is not supported as of now. choose 'lex' or 'parse'");
+ return false;
+ } else if (arg == "lex") {
+ options.dump_option = CompileOptions::LEXER_DUMP;
+ } else if (arg == "parse") {
+ options.dump_option = CompileOptions::PARSER_AST_DUMP;
+ } else if (arg == "register_plugins") {
+ options.dump_option = CompileOptions::REGISTER_PLUGINS_DUMP;
+ } else if (arg == "injection") {
+ options.dump_option = CompileOptions::INJECTION_DUMP;
+ } else if (arg == "expansion") {
+ options.dump_option = CompileOptions::EXPANSION_DUMP;
+ } else if (arg == "name_resolution") {
+ options.dump_option = CompileOptions::NAME_RESOLUTION_DUMP;
+ } else if (arg == "target_options") {
+ // special case - dump all target options, and then quit compilation
+ // nope, option handling called before init, so have to make this an actual compile option
+ //options.target_data.dump_target_options();
+ //return false;
+ options.dump_option = CompileOptions::TARGET_OPTION_DUMP;
+ } else if (arg == "") {
+ error_at(UNKNOWN_LOCATION, "dump option was not given a name. choose 'lex' or 'parse'");
+ return false;
+ } else {
+ error_at(UNKNOWN_LOCATION, "dump option '%s' was unrecognised. choose 'lex' or 'parse'",
+ arg.c_str());
+ return false;
+ }
+ return true;
}
- // parse file here
- // create lexer and parser - these are file-specific and so aren't instance
- // variables
- Rust::Lexer lex (filename, file_wrap.file, rust_get_linemap ());
- Rust::Parser parser (lex);
-
- // determine parsing method from options
- /* FIXME: currently, the dump means that full compilation will not occur as of
- * present. In future, dumps should not inhibit full compilation. */
- switch (options.dump_option)
- {
- case CompileOptions::NO_DUMP:
- fatal_error (UNKNOWN_LOCATION,
- "no-dump parsing has not been enabled yet");
- return;
- case CompileOptions::LEXER_DUMP:
- parser.debug_dump_lex_output ();
- return;
- case CompileOptions::PARSER_AST_DUMP:
- parser.debug_dump_ast_output ();
- return;
- case CompileOptions::REGISTER_PLUGINS_DUMP:
- case CompileOptions::INJECTION_DUMP:
- case CompileOptions::EXPANSION_DUMP:
- case CompileOptions::NAME_RESOLUTION_DUMP:
- // will break later after more stages
- break;
- // semantic analysis when completed
- default:
- fatal_error (UNKNOWN_LOCATION, "unrecognised dump option: '%u'",
- options.dump_option);
- return;
+ /* Actual main entry point for front-end. Called from langhook to parse files. */
+ void Session::parse_files(int num_files, const char** files) {
+ for (int i = 0; i < num_files; i++) {
+ parse_file(files[i]);
+ }
+ // TODO: should semantic analysis be dealed with here? or per file? for now, per-file.
}
- /* basic pipeline:
- * - lex
- * - parse
- * - register plugins (dummy stage for now) - attribute injection? what is
- * this? (attribute injection is injecting attributes specified in command
- * line into crate root)
- * - injection (some lint checks or dummy, register builtin macros, crate
- * injection)
- * - expansion (expands all macros, maybe build test harness, AST validation,
- * maybe macro crate)
- * - name resolution (name resolution, maybe feature checking, maybe buffered
- * lints)
- * TODO not done */
-
- // generate crate from parser
- AST::Crate parsed_crate = parser.parse_crate ();
-
- fprintf (stderr, "\033[0;31mSUCCESSFULLY PARSED CRATE \n\033[0m");
-
- // register plugins pipeline stage
- register_plugins (parsed_crate);
- fprintf (stderr, "\033[0;31mSUCCESSFULLY REGISTERED PLUGINS \n\033[0m");
-
- if (options.dump_option == CompileOptions::REGISTER_PLUGINS_DUMP)
- {
- // TODO: what do I dump here?
- return;
- }
-
- // injection pipeline stage
- injection (parsed_crate);
- fprintf (stderr, "\033[0;31mSUCCESSFULLY FINISHED INJECTION \n\033[0m");
-
- if (options.dump_option == CompileOptions::INJECTION_DUMP)
- {
- // TODO: what do I dump here? injected crate names?
- return;
+ // Parses a single file with filename filename.
+ void Session::parse_file(const char* filename) {
+ RAIIFile file_wrap(filename);
+
+ if (file_wrap.file == NULL) {
+ fatal_error(UNKNOWN_LOCATION, "cannot open filename %s: %m", filename);
+ }
+
+ // parse file here
+ // create lexer and parser - these are file-specific and so aren't instance variables
+ Rust::Lexer lex(filename, file_wrap.file, rust_get_linemap());
+ Rust::Parser parser(lex);
+
+ // determine parsing method from options
+ /* FIXME: currently, the dump means that full compilation will not occur as of present. In
+ * future, dumps should not inhibit full compilation. */
+ switch (options.dump_option) {
+ case CompileOptions::NO_DUMP:
+ fatal_error(UNKNOWN_LOCATION, "no-dump parsing has not been enabled yet");
+ return;
+ case CompileOptions::LEXER_DUMP:
+ parser.debug_dump_lex_output();
+ return;
+ case CompileOptions::PARSER_AST_DUMP:
+ parser.debug_dump_ast_output();
+ return;
+ case CompileOptions::REGISTER_PLUGINS_DUMP:
+ case CompileOptions::INJECTION_DUMP:
+ case CompileOptions::EXPANSION_DUMP:
+ case CompileOptions::NAME_RESOLUTION_DUMP:
+ // will break later after more stages
+ break;
+ // semantic analysis when completed
+ case CompileOptions::TARGET_OPTION_DUMP:
+ options.target_data.dump_target_options();
+ return;
+ default:
+ fatal_error(UNKNOWN_LOCATION, "unrecognised dump option: '%u'", options.dump_option);
+ return;
+ }
+
+ /* basic pipeline:
+ * - lex
+ * - parse
+ * - register plugins (dummy stage for now) - attribute injection? what is this?
+ * (attribute injection is injecting attributes specified in command line into crate root)
+ * - injection (some lint checks or dummy, register builtin macros, crate injection)
+ * - expansion (expands all macros, maybe build test harness, AST validation, maybe macro
+ * crate)
+ * - name resolution (name resolution, maybe feature checking, maybe buffered lints)
+ * TODO not done */
+
+ // generate crate from parser
+ AST::Crate parsed_crate = parser.parse_crate();
+
+ fprintf(stderr, "\033[0;31mSUCCESSFULLY PARSED CRATE \n\033[0m");
+
+ // register plugins pipeline stage
+ register_plugins(parsed_crate);
+ fprintf(stderr, "\033[0;31mSUCCESSFULLY REGISTERED PLUGINS \n\033[0m");
+
+ if (options.dump_option == CompileOptions::REGISTER_PLUGINS_DUMP) {
+ // TODO: what do I dump here?
+ return;
+ }
+
+ // injection pipeline stage
+ injection(parsed_crate);
+ fprintf(stderr, "\033[0;31mSUCCESSFULLY FINISHED INJECTION \n\033[0m");
+
+ if (options.dump_option == CompileOptions::INJECTION_DUMP) {
+ // TODO: what do I dump here? injected crate names?
+ return;
+ }
+
+ // expansion pipeline stage
+ expansion(parsed_crate);
+ fprintf(stderr, "\033[0;31mSUCCESSFULLY FINISHED EXPANSION \n\033[0m");
+
+ if (options.dump_option == CompileOptions::EXPANSION_DUMP) {
+ // TODO: what do I dump here? expanded macros? AST with expanded macros?
+ return;
+ }
+
+ // name resolution pipeline stage
+ name_resolution(parsed_crate);
+ fprintf(stderr, "\033[0;31mSUCCESSFULLY FINISHED NAME RESOLUTION \n\033[0m");
+
+ if (options.dump_option == CompileOptions::NAME_RESOLUTION_DUMP) {
+ // TODO: what do I dump here? resolved names? AST with resolved names?
+ return;
+ }
}
- // expansion pipeline stage
- expansion (parsed_crate);
- fprintf (stderr, "\033[0;31mSUCCESSFULLY FINISHED EXPANSION \n\033[0m");
+ // Checks whether 'cfg' attribute prevents compilation.
+ bool check_cfg(const AST::Attribute& attr ATTRIBUTE_UNUSED) {
+ // if "has sub items", and if 'cfg' attr, recursively call this on sub items?
- if (options.dump_option == CompileOptions::EXPANSION_DUMP)
- {
- // TODO: what do I dump here? expanded macros? AST with expanded macros?
- return;
- }
-
- // name resolution pipeline stage
- name_resolution (parsed_crate);
- fprintf (stderr, "\033[0;31mSUCCESSFULLY FINISHED NAME RESOLUTION \n\033[0m");
+ // TODO: actually implement. assume true for now
- if (options.dump_option == CompileOptions::NAME_RESOLUTION_DUMP)
- {
- // TODO: what do I dump here? resolved names? AST with resolved names?
- return;
+ return true;
}
-}
+ // TODO: deprecated - don't use
-// Checks whether 'cfg' attribute prevents compilation.
-bool
-check_cfg (const AST::Attribute &attr ATTRIBUTE_UNUSED)
-{
- // if "has sub items", and if 'cfg' attr, recursively call this on sub items?
+ // Checks whether any 'cfg' attribute on the item prevents compilation of that item.
+ bool check_item_cfg(::std::vector<AST::Attribute> attrs) {
+ for (const auto& attr : attrs) {
+ if (attr.get_path() == "cfg" && !check_cfg(attr)) {
+ return false;
+ }
+ }
- // TODO: actually implement. assume true for now
-
- return true;
-}
-// TODO: deprecated - don't use
-
-// Checks whether any 'cfg' attribute on the item prevents compilation of that
-// item.
-bool
-check_item_cfg (::std::vector<AST::Attribute> attrs)
-{
- for (const auto &attr : attrs)
- {
- if (attr.get_path () == "cfg" && !check_cfg (attr))
- {
- return false;
- }
+ return true;
}
-
- return true;
-}
-// TODO: deprecated - don't use
-
-// TODO: actually implement method
-void
-load_extern_crate (::std::string crate_name ATTRIBUTE_UNUSED)
-{}
-// TODO: deprecated - don't use
-
-// Parses up to the "load (external) crates" part of the frontend.
-// TODO: lots of this code is probably actually useful outside of dumping, so
-// maybe split off function
-void
-Session::debug_dump_load_crates (Parser &parser)
-{
- // parse crate as AST
- AST::Crate crate = parser.parse_crate ();
-
- /* TODO: search through inner attrs and see whether any of those attr paths
- * contain "no_core", "no_std", "compiler_builtins". If so/not, save certain
- * crate names. In these names, insert items at beginning of crate items. This
- * is crate injection. Also, inject prelude use decl at beginning (first name
- * is assumed to be prelude - prelude is a use decl automatically generated to
- * enable using Option and Copy without qualifying it or importing it via
- * 'use' manually) */
-
- ::std::vector< ::std::string> crate_names;
- for (const auto &item : crate.items)
- {
- // if item is extern crate, add name? to list of stuff ONLY IF config is
- // checked if item is module, iterate this loop inside it as well
- // (recursive?) ONLY IF config is checked
-
- // TODO: actually do the checks somewhere - probably in the items
-
- item->add_crate_name (crate_names);
+ // TODO: deprecated - don't use
+
+ // TODO: actually implement method
+ void load_extern_crate(::std::string crate_name ATTRIBUTE_UNUSED) {}
+ // TODO: deprecated - don't use
+
+ // Parses up to the "load (external) crates" part of the frontend.
+ // TODO: lots of this code is probably actually useful outside of dumping, so maybe split off
+ // function
+ void Session::debug_dump_load_crates(Parser& parser) {
+ // parse crate as AST
+ AST::Crate crate = parser.parse_crate();
+
+ /* TODO: search through inner attrs and see whether any of those attr paths contain "no_core",
+ * "no_std", "compiler_builtins". If so/not, save certain crate names. In these names, insert
+ * items at beginning of crate items. This is crate injection. Also, inject prelude use decl
+ * at beginning (first name is assumed to be prelude - prelude is a use decl automatically
+ * generated to enable using Option and Copy without qualifying it or importing it via 'use'
+ * manually) */
+
+ ::std::vector< ::std::string> crate_names;
+ for (const auto& item : crate.items) {
+ // if item is extern crate, add name? to list of stuff ONLY IF config is checked
+ // if item is module, iterate this loop inside it as well (recursive?) ONLY IF config is
+ // checked
+
+ // TODO: actually do the checks somewhere - probably in the items
+
+ item->add_crate_name(crate_names);
+ }
+
+ /* loop through list of crate names/paths/whatever, attempting to load each one. save loaded
+ * crates to a Session variable? Or save to current AST::Crate? */
+ for (const auto& name : crate_names) {
+ load_extern_crate(name /*, basename = ""?*/);
+ }
+ // for each loaded crate, load dependencies of it as well
}
+ // TODO: deprecated - don't use
- /* loop through list of crate names/paths/whatever, attempting to load each
- * one. save loaded crates to a Session variable? Or save to current
- * AST::Crate? */
- for (const auto &name : crate_names)
- {
- load_extern_crate (name /*, basename = ""?*/);
+ void Session::register_plugins(AST::Crate& crate ATTRIBUTE_UNUSED) {
+ fprintf(stderr, "ran register_plugins (with no body)\n");
}
- // for each loaded crate, load dependencies of it as well
-}
-// TODO: deprecated - don't use
-void
-Session::register_plugins (AST::Crate &crate ATTRIBUTE_UNUSED)
-{
- fprintf (stderr, "ran register_plugins (with no body)\n");
-}
+ // TODO: move somewhere else
+ bool contains_name(const std::vector<AST::Attribute>& attrs, std::string name) {
+ for (const auto& attr : attrs) {
+ if (attr.get_path() == name) {
+ return true;
+ }
+ }
-// TODO: move somewhere else
-bool
-contains_name (::std::vector<AST::Attribute> attrs, ::std::string name)
-{
- for (const auto &attr : attrs)
- {
- if (attr.get_path () == name)
- {
- return true;
- }
+ return false;
}
- return false;
-}
-
-void
-Session::injection (AST::Crate &crate)
-{
- fprintf (stderr, "started injection\n");
-
- // lint checks in future maybe?
-
- // register builtin macros
- /* In rustc, builtin macros are divided into 3 categories depending on use -
- * "bang" macros, "attr" macros, and "derive" macros. I think the meanings of
- * these categories should be fairly obvious to anyone who has used rust.
- * Builtin macro list by category: Bang
- * - asm
- * - assert
- * - cfg
- * - column
- * - compile_error
- * - concat_idents
- * - concat
- * - env
- * - file
- * - format_args_nl
- * - format_args
- * - global_asm
- * - include_bytes
- * - include_str
- * - include
- * - line
- * - log_syntax
- * - module_path
- * - option_env
- * - stringify
- * - trace_macros
- * Attr
- * - bench
- * - global_allocator
- * - test
- * - test_case
- * Derive
- * - Clone
- * - Copy
- * - Debug
- * - Default
- * - Eq
- * - Hash
- * - Ord
- * - PartialEq
- * - PartialOrd
- * - RustcDecodable
- * - RustcEncodable
- * rustc also has a "quote" macro that is defined differently and is
- * supposedly not stable so eh. */
- /* TODO: actually implement injection of these macros. In particular, derive
- * macros, cfg, and
- * test should be prioritised since they seem to be used the most. */
-
- // crate injection
- ::std::vector< ::std::string> names;
- if (contains_name (crate.inner_attrs, "no_core"))
- {
- // no prelude
- injected_crate_name = "";
+ void Session::injection(AST::Crate& crate) {
+ fprintf(stderr, "started injection\n");
+
+ // lint checks in future maybe?
+
+ // register builtin macros
+ /* In rustc, builtin macros are divided into 3 categories depending on use - "bang" macros,
+ * "attr" macros, and "derive" macros. I think the meanings of these categories should be
+ * fairly obvious to anyone who has used rust. Builtin macro list by category: Bang
+ * - asm
+ * - assert
+ * - cfg
+ * - column
+ * - compile_error
+ * - concat_idents
+ * - concat
+ * - env
+ * - file
+ * - format_args_nl
+ * - format_args
+ * - global_asm
+ * - include_bytes
+ * - include_str
+ * - include
+ * - line
+ * - log_syntax
+ * - module_path
+ * - option_env
+ * - stringify
+ * - trace_macros
+ * Attr
+ * - bench
+ * - global_allocator
+ * - test
+ * - test_case
+ * Derive
+ * - Clone
+ * - Copy
+ * - Debug
+ * - Default
+ * - Eq
+ * - Hash
+ * - Ord
+ * - PartialEq
+ * - PartialOrd
+ * - RustcDecodable
+ * - RustcEncodable
+ * rustc also has a "quote" macro that is defined differently and is supposedly not stable so
+ * eh. */
+ /* TODO: actually implement injection of these macros. In particular, derive macros, cfg, and
+ * test should be prioritised since they seem to be used the most. */
+
+ // crate injection
+ ::std::vector< ::std::string> names;
+ if (contains_name(crate.inner_attrs, "no_core")) {
+ // no prelude
+ injected_crate_name = "";
+ } else if (contains_name(crate.inner_attrs, "no_std")) {
+ names.push_back("core");
+
+ if (!contains_name(crate.inner_attrs, "compiler_builtins")) {
+ names.push_back("compiler_builtins");
+ }
+
+ injected_crate_name = "core";
+ } else {
+ names.push_back("std");
+
+ injected_crate_name = "std";
+ }
+
+ // reverse iterate through names to insert crate items in "forward" order at beginning of
+ // crate
+ for (auto it = names.rbegin(); it != names.rend(); ++it) {
+ // create "macro use" attribute for use on extern crate item to enable loading macros from
+ // it
+ AST::Attribute attr(AST::SimplePath::from_str("macro_use"), NULL);
+
+ // create "extern crate" item with the name
+ ::std::unique_ptr<AST::ExternCrate> extern_crate(
+ new AST::ExternCrate(*it, AST::Visibility::create_error(), { ::std::move(attr) },
+ Linemap::unknown_location()));
+
+ // insert at beginning
+ crate.items.insert(crate.items.begin(), ::std::move(extern_crate));
+ }
+
+ // create use tree path
+ // prelude is injected_crate_name
+ ::std::vector<AST::SimplePathSegment> segments
+ = { AST::SimplePathSegment(injected_crate_name), AST::SimplePathSegment("prelude"),
+ AST::SimplePathSegment("v1") };
+ // create use tree and decl
+ ::std::unique_ptr<AST::UseTreeGlob> use_tree(new AST::UseTreeGlob(
+ AST::UseTreeGlob::PATH_PREFIXED, AST::SimplePath(::std::move(segments)), Location()));
+ AST::Attribute prelude_attr(AST::SimplePath::from_str("prelude_import"), NULL);
+ ::std::unique_ptr<AST::UseDeclaration> use_decl(new AST::UseDeclaration(::std::move(use_tree),
+ AST::Visibility::create_error(), { ::std::move(prelude_attr) }, Location()));
+
+ crate.items.insert(crate.items.begin(), ::std::move(use_decl));
+
+ /* TODO: potentially add checking attribute crate type? I can't figure out what this does
+ * currently comment says "Unconditionally collect crate types from attributes to make them
+ * used", which presumably refers to checking the linkage info by "crate_type". It also seems
+ * to ensure that an invalid crate type is not specified, so maybe just do that. Valid crate
+ * types: bin lib dylib staticlib cdylib rlib proc-macro */
+
+ fprintf(stderr, "finished injection\n");
}
- else if (contains_name (crate.inner_attrs, "no_std"))
- {
- names.push_back ("core");
- if (!contains_name (crate.inner_attrs, "compiler_builtins"))
- {
- names.push_back ("compiler_builtins");
- }
+ void Session::expansion(AST::Crate& crate ATTRIBUTE_UNUSED) {
+ fprintf(stderr, "started expansion\n");
- injected_crate_name = "core";
- }
- else
- {
- names.push_back ("std");
+ // rustc has a modification to windows PATH temporarily here, which may end up being required
- injected_crate_name = "std";
- }
-
- // reverse iterate through names to insert crate items in "forward" order
- // at beginning of crate
- for (auto it = names.rbegin (); it != names.rend (); ++it)
- {
- // create "macro use" attribute for use on extern crate item to enable
- // loading macros from it
- AST::Attribute attr (AST::SimplePath::from_str ("macro_use"), NULL);
-
- // create "extern crate" item with the name
- ::std::unique_ptr<AST::ExternCrate> extern_crate (
- new AST::ExternCrate (*it, AST::Visibility::create_error (),
- {::std::move (attr)},
- Linemap::unknown_location ()));
-
- // insert at beginning
- crate.items.insert (crate.items.begin (), ::std::move (extern_crate));
- }
+ // create macro expansion config?
+ // if not, would at least have to configure recursion_limit
- // create use tree path
- // prelude is injected_crate_name
- ::std::vector<AST::SimplePathSegment> segments
- = {AST::SimplePathSegment (injected_crate_name),
- AST::SimplePathSegment ("prelude"), AST::SimplePathSegment ("v1")};
- // create use tree and decl
- ::std::unique_ptr<AST::UseTreeGlob> use_tree (
- new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED,
- AST::SimplePath (::std::move (segments)),
- Location ()));
- AST::Attribute prelude_attr (AST::SimplePath::from_str ("prelude_import"),
- NULL);
- ::std::unique_ptr<AST::UseDeclaration> use_decl (
- new AST::UseDeclaration (::std::move (use_tree),
- AST::Visibility::create_error (),
- {::std::move (prelude_attr)}, Location ()));
-
- crate.items.insert (crate.items.begin (), ::std::move (use_decl));
-
- /* TODO: potentially add checking attribute crate type? I can't figure out
- * what this does currently comment says "Unconditionally collect crate
- * types from attributes to make them used", which presumably refers to
- * checking the linkage info by "crate_type". It also seems to ensure that
- * an invalid crate type is not specified, so maybe just do that. Valid
- * crate types: bin lib dylib staticlib cdylib rlib proc-macro */
-
- fprintf (stderr, "finished injection\n");
-}
+ // create extctxt? from parse session, cfg, and resolver?
+ // expand by calling cxtctxt object's monotonic_expander's expand_crate method.
-void
-Session::expansion (AST::Crate &crate ATTRIBUTE_UNUSED)
-{
- fprintf (stderr, "started expansion\n");
+ // error reporting - check unused macros, get missing fragment specifiers
- // rustc has a modification to windows PATH temporarily here, which may end
- // up being required
+ // build test harness
- // create macro expansion config?
- // if not, would at least have to configure recursion_limit
+ // ast validation (also with proc macro decls)
- // create extctxt? from parse session, cfg, and resolver?
- // expand by calling cxtctxt object's monotonic_expander's expand_crate
- // method.
+ // maybe create macro crate if not rustdoc
- // error reporting - check unused macros, get missing fragment specifiers
+ fprintf(stderr, "finished expansion\n");
+ }
- // build test harness
+ void Session::name_resolution(AST::Crate& crate ATTRIBUTE_UNUSED) {
+ fprintf(stderr, "started name resolution\n");
- // ast validation (also with proc macro decls)
+ fprintf(stderr, "finished name resolution\n");
+ }
- // maybe create macro crate if not rustdoc
+ void TargetOptions::dump_target_options() const {
+ fprintf(stderr, "\033[0;31m--PREPARING TO DUMP ALL TARGET OPTIONS--\n\033[0m");
+ for (const auto& pairs : features) {
+ for (const auto& value : pairs.second) {
+ fprintf(stderr, "%s: \"%s\"\n", pairs.first.c_str(), value.c_str());
+ }
+ if (pairs.second.empty()) {
+ fprintf(stderr, "%s\n", pairs.first.c_str());
+ }
+ }
+ if (features.empty()) {
+ fprintf(stderr, "No target options available!\n");
+ }
+
+ fprintf(stderr, "\033[0;31m--END OF TARGET OPTION DUMP--\n\033[0m");
+ }
- fprintf (stderr, "finished expansion\n");
-}
+ void TargetOptions::init_derived_values() {
+ // enable derived values based on target families
+ if (has_key_value_pair("target_family", "unix"))
+ insert_key("unix");
+ if (has_key_value_pair("target_family", "windows"))
+ insert_key("windows");
+
+ // implicitly enable features
+ if (has_key_value_pair("target_feature", "aes"))
+ enable_implicit_feature_reqs("aes");
+ if (has_key_value_pair("target_feature", "avx"))
+ enable_implicit_feature_reqs("sse4.2");
+ if (has_key_value_pair("target_feature", "avx2"))
+ enable_implicit_feature_reqs("avx");
+ if (has_key_value_pair("target_feature", "pclmulqdq"))
+ enable_implicit_feature_reqs("sse2");
+ if (has_key_value_pair("target_feature", "sha"))
+ enable_implicit_feature_reqs("sse2");
+ if (has_key_value_pair("target_feature", "sse2"))
+ enable_implicit_feature_reqs("sse");
+ if (has_key_value_pair("target_feature", "sse3"))
+ enable_implicit_feature_reqs("sse2");
+ if (has_key_value_pair("target_feature", "sse4.1"))
+ enable_implicit_feature_reqs("sse3");
+ if (has_key_value_pair("target_feature", "sse4.2"))
+ enable_implicit_feature_reqs("sse4.1");
+ if (has_key_value_pair("target_feature", "ssse3"))
+ enable_implicit_feature_reqs("sse3");
+ }
-void
-Session::name_resolution (AST::Crate &crate ATTRIBUTE_UNUSED)
-{
- fprintf (stderr, "started name resolution\n");
+ void TargetOptions::enable_implicit_feature_reqs(std::string feature) {
+ if (feature == "aes")
+ enable_implicit_feature_reqs("sse2");
+ else if (feature == "avx")
+ enable_implicit_feature_reqs("sse4.2");
+ else if (feature == "avx2")
+ enable_implicit_feature_reqs("avx");
+ else if (feature == "fma")
+ enable_implicit_feature_reqs("avx");
+ else if (feature == "pclmulqdq")
+ enable_implicit_feature_reqs("sse2");
+ else if (feature == "sha")
+ enable_implicit_feature_reqs("sse2");
+ else if (feature == "sse2")
+ enable_implicit_feature_reqs("sse");
+ else if (feature == "sse3")
+ enable_implicit_feature_reqs("sse2");
+ else if (feature == "sse4.1")
+ enable_implicit_feature_reqs("sse3");
+ else if (feature == "sse4.2")
+ enable_implicit_feature_reqs("sse4.1");
+ else if (feature == "ssse3")
+ enable_implicit_feature_reqs("sse3");
+
+ if (!has_key_value_pair("target_feature", feature))
+ insert_key_value_pair("target_feature", feature);
+ }
- fprintf (stderr, "finished name resolution\n");
+ // NOTEs:
+ /* mrustc compile pipeline:
+ * - target load (pass target spec to parser?)
+ * - parse (convert source to AST)
+ * - load crates (load any explicitly mentioned extern crates [not all of them])
+ * - expand (AST transformations from attributes and macros, loads remaining extern crates
+ * [std/core and any triggered by macro expansion])
+ * - implicit crates (test harness, allocator crate, panic crate)
+ * - resolve use (annotate every 'use' item with source [supposedly handles nasty recursion])
+ * - resolve index (generate index of visible items for every module [avoids recursion in next
+ * pass])
+ * - resolve absolute (resolve all paths into either variable names [types/values] or absolute
+ * paths)
+ * - HIR lower (convert modified AST to simpler HIR [both expressions and module tree])
+ * - resolve type aliases (replace any usages of type aliases with actual type [except associated
+ * types])
+ * - resolve bind (iterate HIR tree and set binding annotations on all concrete types [avoids
+ * path lookups later])
+ * - resolve HIR markings (generate "markings" [e.g. for Copy/Send/Sync/...] for all types
+ * - sort impls (small pass - sort impls into groups)
+ * - resolve UFCS outer (determine source trait for all top-level <T>::Type [qualified] paths)
+ * - resolve UFCS paths (do the same, but include for exprs this time. also normalises results of
+ * previous pass [expanding known associated types])
+ * - constant evaluate (evaluate all constants)
+ * - typecheck outer (checks impls are sane)
+ * - typecheck expressions (resolve and check types for all exprs)
+ * - expand HIR annotate (annotate how exprs are used - used for closure extractions and
+ * reborrows)
+ * - expand HIR closures (extract closures into structs implementing Fn* traits)
+ * - expand HIR vtables (generate vtables for types with dyn dispatch)
+ * - expand HIR calls (converts method and callable calls into explicit function calls)
+ * - expand HIR reborrows (apply reborrow rules [taking '&mut *v' instead of 'v'])
+ * - expand HIR erasedtype (replace all erased types 'impl Trait' with the true type)
+ * - typecheck expressions (validate - double check that previous passes haven't broke type
+ * system rules)
+ * - lower MIR (convert HIR exprs into a control-flow graph [MIR])
+ * - MIR validate (check that the generated MIR is consistent)
+ * - MIR cleanup (perform various transformations on MIR - replace reads of const items with the
+ * item itself; convert casts to unsized types into 'MakeDst' operations)
+ * - MIR optimise (perform various simple optimisations on the MIR - constant propagation, dead
+ * code elimination, borrow elimination, some inlining)
+ * - MIR validate PO (re-validate the MIR)
+ * - MIR validate full (optionally: perform expensive state-tracking validation on MIR)
+ * - trans enumerate (enumerate all items needed for code generation, primarily types used for
+ * generics)
+ * - trans auto impls (create magic trait impls as enumerated in previous pass)
+ * - trans monomorph (generate monomorphised copies of all functions [with generics replaced with
+ * real types])
+ * - MIR optimise inline (run optimisation again, this time with full type info [primarily for
+ * inlining])
+ * - HIR serialise (write out HIR dump [module tree and generic/inline MIR])
+ * - trans codegen (generate final output file: emit C source file and call C compiler) */
+
+ /* rustc compile pipeline (basic, in way less detail):
+ * - parse input (parse .rs to AST)
+ * - name resolution, macro expansion, and configuration (process AST recursively, resolving
+ * paths, expanding macros, processing #[cfg] nodes [i.e. maybe stripping stuff from AST])
+ * - lower to HIR
+ * - type check and other analyses (e.g. privacy checking)
+ * - lower to MIR and post-processing (and do stuff like borrow checking)
+ * - translation to LLVM IR and LLVM optimisations (produce the .o files)
+ * - linking (link together .o files) */
+
+ /* Pierced-together rustc compile pipeline (from source):
+ * - parse input (parse file to crate)
+ * - register plugins (attributes injection, set various options, register lints, load plugins)
+ * - expansion/configure and expand (initial 'cfg' processing, 'loading compiler plugins',
+ * syntax expansion, secondary 'cfg' expansion, synthesis of a test harness if required,
+ * injection of any std lib dependency and prelude, and name resolution) - actually documented
+ * inline
+ * - seeming pierced-together order: pre-AST expansion lint checks, registering builtin
+ * macros, crate injection, then expand all macros, then maybe build test harness, AST validation,
+ * maybe create a macro crate (if not rustdoc), name resolution, complete gated feature
+ * checking, add all buffered lints
+ * - create global context (lower to HIR)
+ * - analysis on global context (HIR optimisations? create MIR?)
+ * - code generation
+ * - link */
}
-
-// NOTEs:
-/* mrustc compile pipeline:
- * - target load (pass target spec to parser?)
- * - parse (convert source to AST)
- * - load crates (load any explicitly mentioned extern crates [not all of
- * them])
- * - expand (AST transformations from attributes and macros, loads remaining
- * extern crates [std/core and any triggered by macro expansion])
- * - implicit crates (test harness, allocator crate, panic crate)
- * - resolve use (annotate every 'use' item with source [supposedly handles
- * nasty recursion])
- * - resolve index (generate index of visible items for every module [avoids
- * recursion in next pass])
- * - resolve absolute (resolve all paths into either variable names
- * [types/values] or absolute paths)
- * - HIR lower (convert modified AST to simpler HIR [both expressions and
- * module tree])
- * - resolve type aliases (replace any usages of type aliases with actual
- * type [except associated types])
- * - resolve bind (iterate HIR tree and set binding annotations on all
- * concrete types [avoids path lookups later])
- * - resolve HIR markings (generate "markings" [e.g. for Copy/Send/Sync/...]
- * for all types
- * - sort impls (small pass - sort impls into groups)
- * - resolve UFCS outer (determine source trait for all top-level <T>::Type
- * [qualified] paths)
- * - resolve UFCS paths (do the same, but include for exprs this time. also
- * normalises results of previous pass [expanding known associated types])
- * - constant evaluate (evaluate all constants)
- * - typecheck outer (checks impls are sane)
- * - typecheck expressions (resolve and check types for all exprs)
- * - expand HIR annotate (annotate how exprs are used - used for closure
- * extractions and reborrows)
- * - expand HIR closures (extract closures into structs implementing Fn*
- * traits)
- * - expand HIR vtables (generate vtables for types with dyn dispatch)
- * - expand HIR calls (converts method and callable calls into explicit
- * function calls)
- * - expand HIR reborrows (apply reborrow rules [taking '&mut *v' instead of
- * 'v'])
- * - expand HIR erasedtype (replace all erased types 'impl Trait' with the
- * true type)
- * - typecheck expressions (validate - double check that previous passes
- * haven't broke type system rules)
- * - lower MIR (convert HIR exprs into a control-flow graph [MIR])
- * - MIR validate (check that the generated MIR is consistent)
- * - MIR cleanup (perform various transformations on MIR - replace reads of
- * const items with the item itself; convert casts to unsized types into
- * 'MakeDst' operations)
- * - MIR optimise (perform various simple optimisations on the MIR - constant
- * propagation, dead code elimination, borrow elimination, some inlining)
- * - MIR validate PO (re-validate the MIR)
- * - MIR validate full (optionally: perform expensive state-tracking
- * validation on MIR)
- * - trans enumerate (enumerate all items needed for code generation,
- * primarily types used for generics)
- * - trans auto impls (create magic trait impls as enumerated in previous
- * pass)
- * - trans monomorph (generate monomorphised copies of all functions [with
- * generics replaced with real types])
- * - MIR optimise inline (run optimisation again, this time with full type
- * info [primarily for inlining])
- * - HIR serialise (write out HIR dump [module tree and generic/inline MIR])
- * - trans codegen (generate final output file: emit C source file and call C
- * compiler) */
-
-/* rustc compile pipeline (basic, in way less detail):
- * - parse input (parse .rs to AST)
- * - name resolution, macro expansion, and configuration (process AST
- * recursively, resolving paths, expanding macros, processing #[cfg] nodes
- * [i.e. maybe stripping stuff from AST])
- * - lower to HIR
- * - type check and other analyses (e.g. privacy checking)
- * - lower to MIR and post-processing (and do stuff like borrow checking)
- * - translation to LLVM IR and LLVM optimisations (produce the .o files)
- * - linking (link together .o files) */
-
-/* Pierced-together rustc compile pipeline (from source):
- * - parse input (parse file to crate)
- * - register plugins (attributes injection, set various options, register
- * lints, load plugins)
- * - expansion/configure and expand (initial 'cfg' processing, 'loading
- * compiler plugins', syntax expansion, secondary 'cfg' expansion, synthesis
- * of a test harness if required, injection of any std lib dependency and
- * prelude, and name resolution) - actually documented inline
- * - seeming pierced-together order: pre-AST expansion lint checks,
- * registering builtin macros, crate injection, then expand all macros, then
- * maybe build test harness, AST validation, maybe create a macro crate (if
- * not rustdoc), name resolution, complete gated feature checking, add all
- * buffered lints
- * - create global context (lower to HIR)
- * - analysis on global context (HIR optimisations? create MIR?)
- * - code generation
- * - link */
-} // namespace Rust
diff --git a/gcc/rust/rust-session-manager.h b/gcc/rust/rust-session-manager.h
index 1ce3a92..ee43232 100644
--- a/gcc/rust/rust-session-manager.h
+++ b/gcc/rust/rust-session-manager.h
@@ -18,206 +18,184 @@
#include <utility>
namespace Rust {
-// parser forward decl
-class Parser;
-// crate forward decl
-namespace AST {
-struct Crate;
+ // parser forward decl
+ class Parser;
+ // crate forward decl
+ namespace AST {
+ struct Crate;
+ }
+
+ // Data related to target, most useful for conditional compilation and whatever.
+ struct TargetOptions {
+ // TODO: maybe make private and access through helpers to allow changes to impl
+ std::unordered_map<std::string, std::unordered_set<std::string>> features;
+
+ public:
+ // Returns whether a key is defined in the feature set.
+ bool has_key(std::string key) const {
+ return features.find(key) != features.end();
+ }
+
+ // Returns whether a key exists with the given value in the feature set.
+ bool has_key_value_pair(std::string key, std::string value) const {
+ auto it = features.find(key);
+ if (it != features.end()) {
+ auto set = it->second;
+ auto it2 = set.find(value);
+ if (it2 != set.end())
+ return true;
+ }
+ return false;
+ }
+
+ // Returns the singular value from the key, or if the key has multiple, an empty string.
+ std::string get_singular_value(std::string key) const {
+ auto it = features.find(key);
+ if (it != features.end()) {
+ auto set = it->second;
+ if (set.size() == 1)
+ return *set.begin();
+ }
+ return "";
+ }
+
+ // Returns all values associated with a key (including none), or an empty set if no key is found.
+ std::unordered_set< ::std::string> get_values_for_key(std::string key) const {
+ auto it = features.find(key);
+ if (it != features.end()) {
+ return it->second;
+ }
+ return {};
+ }
+
+ /* Inserts a key (no value) into the feature set. This will do nothing if the key already exists.
+ * This returns whether the insertion was successful (i.e. whether key already existed). */
+ bool insert_key(std::string key) {
+ return features.insert(std::make_pair(key, std::unordered_set<std::string>())).second;
+ }
+
+ // Inserts a key-value pair into the feature set.
+ void insert_key_value_pair(std::string key, std::string value) {
+ auto existing_set = get_values_for_key(key);
+ existing_set.insert(std::move(value));
+ features[std::move(key)] = std::move(existing_set);
+ }
+
+ // Dump all target options to stderr.
+ void dump_target_options() const;
+
+ // Creates derived values and implicit enables after all target info is added (e.g. "unix").
+ void init_derived_values();
+
+ // Enables all requirements for the feature given, and will enable feature itself if not enabled.
+ void enable_implicit_feature_reqs(std::string feature);
+
+ /* According to reference, Rust uses either multi-map key-values or just values (although
+ * values may be aliases for a key-value value). This seems like overkill. Thus, depending on
+ * whether the attributes used in cfg are fixed or not, I think I'll either put each
+ * non-multimap "key-value" as a separate field and have the multimap "key-values" in a
+ * regular map for that one key, or actually use a multimap.
+ *
+ * rustc itself uses a set of key-value tuples where the second tuple element is optional.
+ * This gets rid of the requirement to make a multi-map, I guess, but seems like it might make
+ * search slow (unless all "is defined"-only ones have empty string as second element). */
+ /* cfg attributes:
+ * - target_arch: single value
+ * - target_feature: multiple values possible
+ * - target_os: single value
+ * - target_family: single value (or no value?)
+ * - unix: set when target_family = "unix"
+ * - windows: set when target_family = "windows"
+ * - if these are just syntactic sugar, then maybe have a separate set or map for this kind
+ * of stuff
+ * - target_env: set when needed for disambiguation about ABI - usually empty string for GNU,
+ * complicated
+ * - seems to be a single value (if any)
+ * - target_endian: single value; "little" or "big"
+ * - target_pointer_width: single value, "32" for 32-bit pointers, etc.
+ * - target_vendor, single value
+ * - test: set when testing is being done
+ * - again, seems similar to a "is defined" rather than "is equal to" like unix
+ * - debug_assertions: seems to "is defined"
+ * - proc_macro: no idea, bad docs. seems to be boolean, so maybe "is defined" */
+ };
+
+ // Defines compiler options (e.g. dump, etc.).
+ struct CompileOptions {
+ // TODO: use bitfield for smaller memory requirements?
+
+ // FIXME: this is set up for "instead of" dumping - in future, dumps should not inhibit
+ // compilation
+ enum DumpOptions {
+ NO_DUMP,
+ LEXER_DUMP,
+ PARSER_AST_DUMP,
+ REGISTER_PLUGINS_DUMP,
+ INJECTION_DUMP,
+ EXPANSION_DUMP,
+ NAME_RESOLUTION_DUMP,
+ TARGET_OPTION_DUMP,
+ // TODO: add more?
+ } dump_option;
+
+ // configuration options - actually useful for conditional compilation and whatever
+ // data related to target arch, features, os, family, env, endian, pointer width, vendor
+ TargetOptions target_data;
+ bool enable_test = false;
+ bool debug_assertions = false;
+ bool proc_macro = false;
+ };
+
+ /* Defines a compiler session. This is for a single compiler invocation, so potentially includes
+ * parsing multiple crates. */
+ struct Session {
+ CompileOptions options;
+ // This should really be in a per-crate storage area but it is wiped with every file so eh.
+ ::std::string injected_crate_name;
+
+ // backend wrapper to GCC GENERIC
+ Backend* backend;
+
+ // backend linemap
+ Linemap* linemap;
+
+ // TODO: replace raw pointers with smart pointers?
+
+ public:
+ /* Initialise compiler session. Corresponds to langhook grs_langhook_init(). Note that this is
+ * called after option handling. */
+ void init();
+ bool handle_option(enum opt_code code, const char* arg, HOST_WIDE_INT value, int kind,
+ location_t loc, const struct cl_option_handlers* handlers);
+ void parse_files(int num_files, const char** files);
+ void init_options();
+
+ private:
+ // TODO: should this be private or public?
+ void parse_file(const char* filename);
+ bool enable_dump(::std::string arg);
+
+ void debug_dump_load_crates(Parser& parser);
+
+ void implicitly_enable_feature(::std::string feature_name);
+ void enable_features();
+
+ // pipeline stages - TODO maybe move?
+ /* Register plugins pipeline stage. TODO maybe move to another object? Currently dummy stage.
+ * In future will handle attribute injection (top-level inner attribute creation from command
+ * line arguments), setting options maybe, registering lints maybe, loading plugins maybe. */
+ void register_plugins(AST::Crate& crate);
+ /* Injection pipeline stage. TODO maybe move to another object? Maybe have some lint checks
+ * (in future, obviously), register builtin macros, crate injection. */
+ void injection(AST::Crate& crate);
+ /* Expansion pipeline stage. TODO maybe move to another object? Expands all macros, maybe
+ * build test harness in future, AST validation, maybe create macro crate (if not rustdoc).*/
+ void expansion(AST::Crate& crate);
+ /* Name resolution pipeline stage. TODO maybe move to another object. Performs name
+ * resolution, maybe complete gated feature checking, maybe create buffered lints in future.
+ */
+ void name_resolution(AST::Crate& crate);
+ };
}
-// Data related to target, most useful for conditional compilation and whatever.
-struct TargetOptions
-{
- // TODO: maybe make private and access through helpers to allow changes to
- // impl
- std::unordered_map<std::string, std::unordered_set<std::string> > features;
-
-public:
- // Returns whether a key is defined in the feature set.
- bool has_key (std::string key) const
- {
- return features.find (key) != features.end ();
- }
-
- // Returns whether a key exists with the given value in the feature set.
- bool has_key_value_pair (std::string key, std::string value) const
- {
- auto it = features.find (key);
- if (it != features.end ())
- {
- auto set = it->second;
- auto it2 = set.find (value);
- if (it2 != set.end ())
- return true;
- }
- return false;
- }
-
- // Returns the singular value from the key, or if the key has multiple, an
- // empty string.
- std::string get_singular_value (std::string key) const
- {
- auto it = features.find (key);
- if (it != features.end ())
- {
- auto set = it->second;
- if (set.size () == 1)
- return *set.begin ();
- }
- return "";
- }
-
- // Returns all values associated with a key (including none), or an empty set
- // if no key is found.
- std::unordered_set< ::std::string> get_values_for_key (std::string key) const
- {
- auto it = features.find (key);
- if (it != features.end ())
- {
- return it->second;
- }
- return {};
- }
-
- /* Inserts a key (no value) into the feature set. This will do nothing if the
- * key already exists.
- * This returns whether the insertion was successful (i.e. whether key already
- * existed). */
- bool insert_key (std::string key)
- {
- return features
- .insert (std::make_pair (key, std::unordered_set<std::string> ()))
- .second;
- }
-
- // Inserts a key-value pair into the feature set.
- void insert_key_value_pair (std::string key, std::string value)
- {
- auto existing_set = get_values_for_key (key);
- existing_set.insert (std::move (value));
- features[std::move (key)] = std::move (existing_set);
- }
-
- /* According to reference, Rust uses either multi-map key-values or just
- * values (although values may be aliases for a key-value value). This seems
- * like overkill. Thus, depending on whether the attributes used in cfg are
- * fixed or not, I think I'll either put each non-multimap "key-value" as a
- * separate field and have the multimap "key-values" in a regular map for that
- * one key, or actually use a multimap.
- *
- * rustc itself uses a set of key-value tuples where the second tuple element
- * is optional. This gets rid of the requirement to make a multi-map, I guess,
- * but seems like it might make
- * search slow (unless all "is defined"-only ones have empty string as second
- * element). */
- /* cfg attributes:
- * - target_arch: single value
- * - target_feature: multiple values possible
- * - target_os: single value
- * - target_family: single value (or no value?)
- * - unix: set when target_family = "unix"
- * - windows: set when target_family = "windows"
- * - if these are just syntactic sugar, then maybe have a separate set or map
- * for this kind of stuff
- * - target_env: set when needed for disambiguation about ABI - usually empty
- * string for GNU, complicated
- * - seems to be a single value (if any)
- * - target_endian: single value; "little" or "big"
- * - target_pointer_width: single value, "32" for 32-bit pointers, etc.
- * - target_vendor, single value
- * - test: set when testing is being done
- * - again, seems similar to a "is defined" rather than "is equal to" like
- * unix
- * - debug_assertions: seems to "is defined"
- * - proc_macro: no idea, bad docs. seems to be boolean, so maybe "is defined"
- */
-};
-
-// Defines compiler options (e.g. dump, etc.).
-struct CompileOptions
-{
- // TODO: use bitfield for smaller memory requirements?
-
- // FIXME: this is set up for "instead of" dumping - in future, dumps should
- // not inhibit compilation
- enum DumpOptions
- {
- NO_DUMP,
- LEXER_DUMP,
- PARSER_AST_DUMP,
- REGISTER_PLUGINS_DUMP,
- INJECTION_DUMP,
- EXPANSION_DUMP,
- NAME_RESOLUTION_DUMP,
- // TODO: add more?
- } dump_option;
-
- // configuration options - actually useful for conditional compilation and
- // whatever data related to target arch, features, os, family, env, endian,
- // pointer width, vendor
- TargetOptions target_data;
- bool enable_test = false;
- bool debug_assertions = false;
- bool proc_macro = false;
-};
-
-/* Defines a compiler session. This is for a single compiler invocation, so
- * potentially includes parsing multiple crates. */
-struct Session
-{
- CompileOptions options;
- // This should really be in a per-crate storage area but it is wiped with
- // every file so eh.
- ::std::string injected_crate_name;
-
- // backend wrapper to GCC GENERIC
- Backend *backend;
-
- // backend linemap
- Linemap *linemap;
-
- // TODO: replace raw pointers with smart pointers?
-
-public:
- /* Initialise compiler session. Corresponds to langhook grs_langhook_init().
- * Note that this is called after option handling. */
- void init ();
- bool handle_option (enum opt_code code, const char *arg, HOST_WIDE_INT value,
- int kind, location_t loc,
- const struct cl_option_handlers *handlers);
- void parse_files (int num_files, const char **files);
- void init_options ();
-
-private:
- // TODO: should this be private or public?
- void parse_file (const char *filename);
- bool enable_dump (::std::string arg);
-
- void debug_dump_load_crates (Parser &parser);
-
- void implicitly_enable_feature (::std::string feature_name);
- void enable_features ();
-
- // pipeline stages - TODO maybe move?
- /* Register plugins pipeline stage. TODO maybe move to another object?
- * Currently dummy stage. In future will handle attribute injection (top-level
- * inner attribute creation from command line arguments), setting options
- * maybe, registering lints maybe, loading plugins maybe. */
- void register_plugins (AST::Crate &crate);
- /* Injection pipeline stage. TODO maybe move to another object? Maybe have
- * some lint checks (in future, obviously), register builtin macros, crate
- * injection. */
- void injection (AST::Crate &crate);
- /* Expansion pipeline stage. TODO maybe move to another object? Expands all
- * macros, maybe build test harness in future, AST validation, maybe create
- * macro crate (if not rustdoc).*/
- void expansion (AST::Crate &crate);
- /* Name resolution pipeline stage. TODO maybe move to another object. Performs
- * name resolution, maybe complete gated feature checking, maybe create
- * buffered lints in future.
- */
- void name_resolution (AST::Crate &crate);
-};
-} // namespace Rust
-
#endif
diff --git a/gcc/rust/rust-target.def b/gcc/rust/rust-target.def
index 3f375cb..13cf3e5 100644
--- a/gcc/rust/rust-target.def
+++ b/gcc/rust/rust-target.def
@@ -43,7 +43,7 @@ predefined by this hook apply to all files that are being compiled.",
/* Environmental OS info relating to the target OS. */
DEFHOOK
(/*d_os_versions*/rust_os_info,
- "Similarly to @code{TARGET_RUST_CPU_INFO}, but is used for configuration info\n\
+ "Similar to @code{TARGET_RUST_CPU_INFO}, but is used for configuration info\n\
relating to the target operating system.",
void, (void),
hook_void_void)