diff options
author | Nirav Dave <niravd@google.com> | 2016-06-16 21:19:23 +0000 |
---|---|---|
committer | Nirav Dave <niravd@google.com> | 2016-06-16 21:19:23 +0000 |
commit | 280ecf6ff0858e31ff0073dd3e43b7403d9ccd8e (patch) | |
tree | 437092926a4858c251406db57cb565f223c9337f /llvm/lib/MC/MCParser/AsmLexer.cpp | |
parent | 01e062f5c667296ba7e6558572b9de516bf141f1 (diff) | |
download | llvm-280ecf6ff0858e31ff0073dd3e43b7403d9ccd8e.zip llvm-280ecf6ff0858e31ff0073dd3e43b7403d9ccd8e.tar.gz llvm-280ecf6ff0858e31ff0073dd3e43b7403d9ccd8e.tar.bz2 |
Revert "Refactor and cleanup Assembly Parsing / Lexing"
Reverting for unexpected crashes on various platforms.
This reverts commit r272953.
llvm-svn: 272957
Diffstat (limited to 'llvm/lib/MC/MCParser/AsmLexer.cpp')
-rw-r--r-- | llvm/lib/MC/MCParser/AsmLexer.cpp | 137 |
1 files changed, 60 insertions, 77 deletions
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp index d56071a..f653304 100644 --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -23,8 +23,7 @@ using namespace llvm; AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { CurPtr = nullptr; - IsAtStartOfLine = true; - IsAtStartOfStatement = true; + isAtStartOfLine = true; AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); } @@ -51,9 +50,20 @@ AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { } int AsmLexer::getNextChar() { - if (CurPtr == CurBuf.end()) + char CurChar = *CurPtr++; + switch (CurChar) { + default: + return (unsigned char)CurChar; + case 0: + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr - 1 != CurBuf.end()) + return 0; // Just whitespace. + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. return EOF; - return (unsigned char)*CurPtr++; + } } /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? @@ -159,52 +169,43 @@ AsmToken AsmLexer::LexIdentifier() { AsmToken AsmLexer::LexSlash() { switch (*CurPtr) { case '*': - IsAtStartOfStatement = false; break; // C style comment. case '/': ++CurPtr; return LexLineComment(); default: - IsAtStartOfStatement = false; - return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); + return AsmToken(AsmToken::Slash, StringRef(CurPtr - 1, 1)); } // C Style comment. ++CurPtr; // skip the star. - while (CurPtr != CurBuf.end()) { - switch (*CurPtr++) { + while (1) { + int CurChar = getNextChar(); + switch (CurChar) { + case EOF: + return ReturnError(TokStart, "unterminated comment"); case '*': // End of the comment? - if (*CurPtr != '/') - break; + if (CurPtr[0] != '/') break; + ++CurPtr; // End the */. - return AsmToken(AsmToken::Comment, - StringRef(TokStart, CurPtr - TokStart)); + return LexToken(); } } - return ReturnError(TokStart, "unterminated comment"); } /// LexLineComment: Comment: #[^\n]* /// : //[^\n]* AsmToken AsmLexer::LexLineComment() { - // Mark This as an end of statement with a body of the - // comment. While it would be nicer to leave this two tokens, - // backwards compatability with TargetParsers makes keeping this in this form - // better. + // FIXME: This is broken if we happen to a comment at the end of a file, which + // was .included, and which doesn't end with a newline. int CurChar = getNextChar(); while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) CurChar = getNextChar(); - IsAtStartOfLine = true; - // Whis is a whole line comment. leave newline - if (IsAtStartOfStatement) - return AsmToken(AsmToken::EndOfStatement, - StringRef(TokStart, CurPtr - TokStart)); - IsAtStartOfStatement = true; - - return AsmToken(AsmToken::EndOfStatement, - StringRef(TokStart, CurPtr - 1 - TokStart)); + if (CurChar == EOF) + return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); + return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0)); } static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { @@ -422,7 +423,8 @@ StringRef AsmLexer::LexUntilEndOfStatement() { while (!isAtStartOfComment(CurPtr) && // Start of line comment. !isAtStatementSeparator(CurPtr) && // End of statement marker. - *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { + *CurPtr != '\n' && *CurPtr != '\r' && + (*CurPtr != 0 || CurPtr != CurBuf.end())) { ++CurPtr; } return StringRef(TokStart, CurPtr-TokStart); @@ -431,7 +433,8 @@ StringRef AsmLexer::LexUntilEndOfStatement() { StringRef AsmLexer::LexUntilEndOfLine() { TokStart = CurPtr; - while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { + while (*CurPtr != '\n' && *CurPtr != '\r' && + (*CurPtr != 0 || CurPtr != CurBuf.end())) { ++CurPtr; } return StringRef(TokStart, CurPtr-TokStart); @@ -441,8 +444,7 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, bool ShouldSkipSpace) { const char *SavedTokStart = TokStart; const char *SavedCurPtr = CurPtr; - bool SavedAtStartOfLine = IsAtStartOfLine; - bool SavedAtStartOfStatement = IsAtStartOfStatement; + bool SavedAtStartOfLine = isAtStartOfLine; bool SavedSkipSpace = SkipSpace; std::string SavedErr = getErr(); @@ -463,8 +465,7 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, SetError(SavedErrLoc, SavedErr); SkipSpace = SavedSkipSpace; - IsAtStartOfLine = SavedAtStartOfLine; - IsAtStartOfStatement = SavedAtStartOfStatement; + isAtStartOfLine = SavedAtStartOfLine; CurPtr = SavedCurPtr; TokStart = SavedTokStart; @@ -494,45 +495,29 @@ AsmToken AsmLexer::LexToken() { // This always consumes at least one character. int CurChar = getNextChar(); - if (CurChar == '#' && IsAtStartOfStatement) { - // If this starts with a '#', this may be a cpp - // hash directive and otherwise a line comment. - AsmToken TokenBuf[2]; - MutableArrayRef<AsmToken> Buf(TokenBuf, 2); - size_t num = peekTokens(Buf, true); - // There cannot be a space preceeding this - if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) && - TokenBuf[1].is(AsmToken::String)) { - CurPtr = TokStart; // reset curPtr; - StringRef s = LexUntilEndOfLine(); - UnLex(TokenBuf[1]); - UnLex(TokenBuf[0]); - return AsmToken(AsmToken::HashDirective, s); - } + if (isAtStartOfComment(TokStart)) { + // If this comment starts with a '#', then return the Hash token and let + // the assembler parser see if it can be parsed as a cpp line filename + // comment. We do this only if we are at the start of a line. + if (CurChar == '#' && isAtStartOfLine) + return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); + isAtStartOfLine = true; return LexLineComment(); } - - if (isAtStartOfComment(TokStart)) - return LexLineComment(); - if (isAtStatementSeparator(TokStart)) { CurPtr += strlen(MAI.getSeparatorString()) - 1; - IsAtStartOfLine = true; - IsAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, strlen(MAI.getSeparatorString()))); } // If we're missing a newline at EOF, make sure we still get an // EndOfStatement token before the Eof token. - if (CurChar == EOF && !IsAtStartOfStatement) { - IsAtStartOfLine = true; - IsAtStartOfStatement = true; + if (CurChar == EOF && !isAtStartOfLine) { + isAtStartOfLine = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); } - IsAtStartOfLine = false; - bool OldIsAtStartOfStatement = IsAtStartOfStatement; - IsAtStartOfStatement = false; + + isAtStartOfLine = false; switch (CurChar) { default: // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* @@ -541,24 +526,24 @@ AsmToken AsmLexer::LexToken() { // Unknown character, emit an error. return ReturnError(TokStart, "invalid character in input"); - case EOF: - IsAtStartOfLine = true; - IsAtStartOfStatement = true; - return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); + case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); case 0: case ' ': case '\t': - IsAtStartOfStatement = OldIsAtStartOfStatement; - while (*CurPtr == ' ' || *CurPtr == '\t') - CurPtr++; - if (SkipSpace) - return LexToken(); // Ignore whitespace. - else - return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); - case '\n': + if (SkipSpace) { + // Ignore whitespace. + return LexToken(); + } else { + int len = 1; + while (*CurPtr==' ' || *CurPtr=='\t') { + CurPtr++; + len++; + } + return AsmToken(AsmToken::Space, StringRef(TokStart, len)); + } + case '\n': // FALL THROUGH. case '\r': - IsAtStartOfLine = true; - IsAtStartOfStatement = true; + isAtStartOfLine = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); @@ -601,9 +586,7 @@ AsmToken AsmLexer::LexToken() { } return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); - case '/': - IsAtStartOfStatement = OldIsAtStartOfStatement; - return LexSlash(); + case '/': return LexSlash(); case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); case '\'': return LexSingleQuote(); case '"': return LexQuote(); |