1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
|
// Copyright (C) 2020-2023 Free Software Foundation, Inc.
// This file is part of GCC.
// GCC is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 3, or (at your option) any later
// version.
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
#ifndef RUST_LEX_H
#define RUST_LEX_H
#include "rust-linemap.h"
#include "rust-buffered-queue.h"
#include "rust-token.h"
#include "rust-optional.h"
namespace Rust {
// Simple wrapper for FILE* that simplifies destruction.
struct RAIIFile
{
private:
FILE *file;
const char *filename;
void close ()
{
if (file != nullptr && file != stdin)
fclose (file);
}
static bool allowed_filetype (const struct stat &statbuf)
{
// The file could be either
// - a regular file
// - a char device (/dev/null...)
return S_ISREG (statbuf.st_mode) || S_ISCHR (statbuf.st_mode);
}
public:
RAIIFile (const char *filename) : filename (filename)
{
if (strcmp (filename, "-") == 0)
{
file = stdin;
}
else
{
struct stat statbuf;
if (!(file = fopen (filename, "r")))
{
return;
}
if (-1 == fstat (fileno (file), &statbuf)
|| !allowed_filetype (statbuf))
{
fclose (file);
file = nullptr;
errno = EISDIR;
}
}
}
/**
* Create a RAIIFile from an existing instance of FILE*
*/
RAIIFile (FILE *raw, const char *filename = nullptr)
: file (raw), filename (filename)
{}
RAIIFile (const RAIIFile &other) = delete;
RAIIFile &operator= (const RAIIFile &other) = delete;
// have to specify setting file to nullptr, otherwise unintended fclose occurs
RAIIFile (RAIIFile &&other) : file (other.file), filename (other.filename)
{
other.file = nullptr;
}
RAIIFile &operator= (RAIIFile &&other)
{
close ();
file = other.file;
filename = other.filename;
other.file = nullptr;
return *this;
}
static RAIIFile create_error () { return RAIIFile (nullptr, nullptr); }
~RAIIFile () { close (); }
FILE *get_raw () { return file; }
const char *get_filename () { return filename; }
bool ok () const { return file; }
};
class Lexer
{
private:
// Request new Location for current column in line_table
Location get_current_location ();
// Skips the current input char.
void skip_input ();
// Advances current input char to n + 1 chars ahead of current position.
void skip_input (int n);
// Returns char n chars ahead of current position.
int peek_input ();
// Peeks the current char.
int peek_input (int n);
// Classifies keyword (i.e. gets id for keyword).
TokenId classify_keyword (const std::string &str);
// Builds a token from the input queue.
TokenPtr build_token ();
std::tuple<std::string, int, bool> parse_in_decimal ();
std::pair<std::string, int> parse_in_exponent_part ();
std::pair<PrimitiveCoreType, int> parse_in_type_suffix ();
std::tuple<char, int, bool> parse_escape (char opening_char);
std::tuple<Codepoint, int, bool> parse_utf8_escape ();
int parse_partial_string_continue ();
std::pair<long, int> parse_partial_hex_escape ();
std::pair<Codepoint, int> parse_partial_unicode_escape ();
int get_input_codepoint_length ();
int test_get_input_codepoint_n_length (int n_start_offset);
Codepoint peek_codepoint_input ();
Codepoint test_peek_codepoint_input (int n);
void skip_codepoint_input ();
void skip_broken_string_input (int current_char);
TokenPtr parse_byte_char (Location loc);
TokenPtr parse_byte_string (Location loc);
TokenPtr parse_raw_byte_string (Location loc);
TokenPtr parse_raw_identifier (Location loc);
TokenPtr parse_string (Location loc);
TokenPtr maybe_parse_raw_string (Location loc);
TokenPtr parse_raw_string (Location loc, int initial_hash_count);
TokenPtr parse_non_decimal_int_literals (Location loc);
TokenPtr parse_decimal_int_or_float (Location loc);
TokenPtr parse_char_or_lifetime (Location loc);
TokenPtr parse_identifier_or_keyword (Location loc);
template <typename IsDigitFunc>
TokenPtr parse_non_decimal_int_literal (Location loc,
IsDigitFunc is_digit_func,
std::string existent_str, int base);
public:
// Construct lexer with input file and filename provided
Lexer (const char *filename, RAIIFile input, Linemap *linemap,
Optional<std::ofstream &> dump_lex_opt
= Optional<std::ofstream &>::none ());
// Lex the contents of a string instead of a file
Lexer (const std::string &input);
// dtor
~Lexer ();
// don't allow copy semantics (for now, at least)
Lexer (const Lexer &other) = delete;
Lexer &operator= (const Lexer &other) = delete;
// enable move semantics
Lexer (Lexer &&other) = default;
Lexer &operator= (Lexer &&other) = default;
// Returns token n tokens ahead of current position.
const_TokenPtr peek_token (int n) { return token_queue.peek (n); }
// Peeks the current token.
const_TokenPtr peek_token () { return peek_token (0); }
// Advances current token to n + 1 tokens ahead of current position.
void skip_token (int n);
// Skips the current token.
void skip_token () { skip_token (0); }
// Dumps and advances by n + 1 tokens.
void dump_and_skip (int n);
// Replaces the current token with a specified token.
void replace_current_token (TokenPtr replacement);
// FIXME: don't use anymore
/* Splits the current token into two. Intended for use with nested generics
* closes (i.e. T<U<X>> where >> is wrongly lexed as one token). Note that
* this will only work with "simple" tokens like punctuation. */
void split_current_token (TokenId new_left, TokenId new_right);
Linemap *get_line_map () { return line_map; }
std::string get_filename () { return std::string (input.get_filename ()); }
private:
void start_line (int current_line, int current_column);
// File for use as input.
RAIIFile input;
// TODO is this actually required? could just have file storage in InputSource
// Current line number.
int current_line;
// Current column number.
int current_column;
// Current character.
int current_char;
// Line map.
Linemap *line_map;
/* Max column number that can be quickly allocated - higher may require
* allocating new linemap */
static const int max_column_hint = 80;
Optional<std::ofstream &> dump_lex_out;
// Input source wrapper thing.
class InputSource
{
public:
virtual ~InputSource () {}
// Overload operator () to return next char from input stream.
virtual int next () = 0;
};
class FileInputSource : public InputSource
{
private:
// Input source file.
FILE *input;
public:
// Create new input source from file.
FileInputSource (FILE *input) : input (input) {}
int next () override { return fgetc (input); }
};
class BufferInputSource : public InputSource
{
private:
const std::string &buffer;
size_t offs;
public:
// Create new input source from file.
BufferInputSource (const std::string &b, size_t offset)
: buffer (b), offs (offset)
{}
int next () override
{
if (offs >= buffer.size ())
return EOF;
return buffer.at (offs++);
}
};
// The input source for the lexer.
// InputSource input_source;
// Input file queue.
std::unique_ptr<InputSource> raw_input_source;
buffered_queue<int, InputSource &> input_queue;
// Token source wrapper thing.
struct TokenSource
{
// The lexer object that will use this TokenSource.
Lexer *lexer;
// Create a new TokenSource with given lexer.
TokenSource (Lexer *parLexer) : lexer (parLexer) {}
// Overload operator () to build token in lexer.
TokenPtr next () { return lexer->build_token (); }
};
// The token source for the lexer.
// TokenSource token_source;
// Token stream queue.
buffered_queue<std::shared_ptr<Token>, TokenSource> token_queue;
};
} // namespace Rust
#endif
|