aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/MC/MCParser/AsmLexer.cpp
diff options
context:
space:
mode:
authorNirav Dave <niravd@google.com>2016-06-16 21:19:23 +0000
committerNirav Dave <niravd@google.com>2016-06-16 21:19:23 +0000
commit280ecf6ff0858e31ff0073dd3e43b7403d9ccd8e (patch)
tree437092926a4858c251406db57cb565f223c9337f /llvm/lib/MC/MCParser/AsmLexer.cpp
parent01e062f5c667296ba7e6558572b9de516bf141f1 (diff)
downloadllvm-280ecf6ff0858e31ff0073dd3e43b7403d9ccd8e.zip
llvm-280ecf6ff0858e31ff0073dd3e43b7403d9ccd8e.tar.gz
llvm-280ecf6ff0858e31ff0073dd3e43b7403d9ccd8e.tar.bz2
Revert "Refactor and cleanup Assembly Parsing / Lexing"
Reverting for unexpected crashes on various platforms. This reverts commit r272953. llvm-svn: 272957
Diffstat (limited to 'llvm/lib/MC/MCParser/AsmLexer.cpp')
-rw-r--r--llvm/lib/MC/MCParser/AsmLexer.cpp137
1 files changed, 60 insertions, 77 deletions
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index d56071a..f653304 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -23,8 +23,7 @@ using namespace llvm;
AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
CurPtr = nullptr;
- IsAtStartOfLine = true;
- IsAtStartOfStatement = true;
+ isAtStartOfLine = true;
AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
}
@@ -51,9 +50,20 @@ AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
}
int AsmLexer::getNextChar() {
- if (CurPtr == CurBuf.end())
+ char CurChar = *CurPtr++;
+ switch (CurChar) {
+ default:
+ return (unsigned char)CurChar;
+ case 0:
+ // A nul character in the stream is either the end of the current buffer or
+ // a random nul in the file. Disambiguate that here.
+ if (CurPtr - 1 != CurBuf.end())
+ return 0; // Just whitespace.
+
+ // Otherwise, return end of file.
+ --CurPtr; // Another call to lex will return EOF again.
return EOF;
- return (unsigned char)*CurPtr++;
+ }
}
/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
@@ -159,52 +169,43 @@ AsmToken AsmLexer::LexIdentifier() {
AsmToken AsmLexer::LexSlash() {
switch (*CurPtr) {
case '*':
- IsAtStartOfStatement = false;
break; // C style comment.
case '/':
++CurPtr;
return LexLineComment();
default:
- IsAtStartOfStatement = false;
- return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
+ return AsmToken(AsmToken::Slash, StringRef(CurPtr - 1, 1));
}
// C Style comment.
++CurPtr; // skip the star.
- while (CurPtr != CurBuf.end()) {
- switch (*CurPtr++) {
+ while (1) {
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ case EOF:
+ return ReturnError(TokStart, "unterminated comment");
case '*':
// End of the comment?
- if (*CurPtr != '/')
- break;
+ if (CurPtr[0] != '/') break;
+
++CurPtr; // End the */.
- return AsmToken(AsmToken::Comment,
- StringRef(TokStart, CurPtr - TokStart));
+ return LexToken();
}
}
- return ReturnError(TokStart, "unterminated comment");
}
/// LexLineComment: Comment: #[^\n]*
/// : //[^\n]*
AsmToken AsmLexer::LexLineComment() {
- // Mark This as an end of statement with a body of the
- // comment. While it would be nicer to leave this two tokens,
- // backwards compatability with TargetParsers makes keeping this in this form
- // better.
+ // FIXME: This is broken if we happen to a comment at the end of a file, which
+ // was .included, and which doesn't end with a newline.
int CurChar = getNextChar();
while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
CurChar = getNextChar();
- IsAtStartOfLine = true;
- // Whis is a whole line comment. leave newline
- if (IsAtStartOfStatement)
- return AsmToken(AsmToken::EndOfStatement,
- StringRef(TokStart, CurPtr - TokStart));
- IsAtStartOfStatement = true;
-
- return AsmToken(AsmToken::EndOfStatement,
- StringRef(TokStart, CurPtr - 1 - TokStart));
+ if (CurChar == EOF)
+ return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
+ return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
}
static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
@@ -422,7 +423,8 @@ StringRef AsmLexer::LexUntilEndOfStatement() {
while (!isAtStartOfComment(CurPtr) && // Start of line comment.
!isAtStatementSeparator(CurPtr) && // End of statement marker.
- *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
+ *CurPtr != '\n' && *CurPtr != '\r' &&
+ (*CurPtr != 0 || CurPtr != CurBuf.end())) {
++CurPtr;
}
return StringRef(TokStart, CurPtr-TokStart);
@@ -431,7 +433,8 @@ StringRef AsmLexer::LexUntilEndOfStatement() {
StringRef AsmLexer::LexUntilEndOfLine() {
TokStart = CurPtr;
- while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
+ while (*CurPtr != '\n' && *CurPtr != '\r' &&
+ (*CurPtr != 0 || CurPtr != CurBuf.end())) {
++CurPtr;
}
return StringRef(TokStart, CurPtr-TokStart);
@@ -441,8 +444,7 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
bool ShouldSkipSpace) {
const char *SavedTokStart = TokStart;
const char *SavedCurPtr = CurPtr;
- bool SavedAtStartOfLine = IsAtStartOfLine;
- bool SavedAtStartOfStatement = IsAtStartOfStatement;
+ bool SavedAtStartOfLine = isAtStartOfLine;
bool SavedSkipSpace = SkipSpace;
std::string SavedErr = getErr();
@@ -463,8 +465,7 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
SetError(SavedErrLoc, SavedErr);
SkipSpace = SavedSkipSpace;
- IsAtStartOfLine = SavedAtStartOfLine;
- IsAtStartOfStatement = SavedAtStartOfStatement;
+ isAtStartOfLine = SavedAtStartOfLine;
CurPtr = SavedCurPtr;
TokStart = SavedTokStart;
@@ -494,45 +495,29 @@ AsmToken AsmLexer::LexToken() {
// This always consumes at least one character.
int CurChar = getNextChar();
- if (CurChar == '#' && IsAtStartOfStatement) {
- // If this starts with a '#', this may be a cpp
- // hash directive and otherwise a line comment.
- AsmToken TokenBuf[2];
- MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
- size_t num = peekTokens(Buf, true);
- // There cannot be a space preceeding this
- if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
- TokenBuf[1].is(AsmToken::String)) {
- CurPtr = TokStart; // reset curPtr;
- StringRef s = LexUntilEndOfLine();
- UnLex(TokenBuf[1]);
- UnLex(TokenBuf[0]);
- return AsmToken(AsmToken::HashDirective, s);
- }
+ if (isAtStartOfComment(TokStart)) {
+ // If this comment starts with a '#', then return the Hash token and let
+ // the assembler parser see if it can be parsed as a cpp line filename
+ // comment. We do this only if we are at the start of a line.
+ if (CurChar == '#' && isAtStartOfLine)
+ return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+ isAtStartOfLine = true;
return LexLineComment();
}
-
- if (isAtStartOfComment(TokStart))
- return LexLineComment();
-
if (isAtStatementSeparator(TokStart)) {
CurPtr += strlen(MAI.getSeparatorString()) - 1;
- IsAtStartOfLine = true;
- IsAtStartOfStatement = true;
return AsmToken(AsmToken::EndOfStatement,
StringRef(TokStart, strlen(MAI.getSeparatorString())));
}
// If we're missing a newline at EOF, make sure we still get an
// EndOfStatement token before the Eof token.
- if (CurChar == EOF && !IsAtStartOfStatement) {
- IsAtStartOfLine = true;
- IsAtStartOfStatement = true;
+ if (CurChar == EOF && !isAtStartOfLine) {
+ isAtStartOfLine = true;
return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
}
- IsAtStartOfLine = false;
- bool OldIsAtStartOfStatement = IsAtStartOfStatement;
- IsAtStartOfStatement = false;
+
+ isAtStartOfLine = false;
switch (CurChar) {
default:
// Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
@@ -541,24 +526,24 @@ AsmToken AsmLexer::LexToken() {
// Unknown character, emit an error.
return ReturnError(TokStart, "invalid character in input");
- case EOF:
- IsAtStartOfLine = true;
- IsAtStartOfStatement = true;
- return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
+ case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
case 0:
case ' ':
case '\t':
- IsAtStartOfStatement = OldIsAtStartOfStatement;
- while (*CurPtr == ' ' || *CurPtr == '\t')
- CurPtr++;
- if (SkipSpace)
- return LexToken(); // Ignore whitespace.
- else
- return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
- case '\n':
+ if (SkipSpace) {
+ // Ignore whitespace.
+ return LexToken();
+ } else {
+ int len = 1;
+ while (*CurPtr==' ' || *CurPtr=='\t') {
+ CurPtr++;
+ len++;
+ }
+ return AsmToken(AsmToken::Space, StringRef(TokStart, len));
+ }
+ case '\n': // FALL THROUGH.
case '\r':
- IsAtStartOfLine = true;
- IsAtStartOfStatement = true;
+ isAtStartOfLine = true;
return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
@@ -601,9 +586,7 @@ AsmToken AsmLexer::LexToken() {
}
return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
- case '/':
- IsAtStartOfStatement = OldIsAtStartOfStatement;
- return LexSlash();
+ case '/': return LexSlash();
case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
case '\'': return LexSingleQuote();
case '"': return LexQuote();