diff options
author | Rahul Joshi <rjoshi@nvidia.com> | 2024-10-07 14:02:28 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-07 14:02:28 -0700 |
commit | 3bace7efe00cbc5caadd2926027f4675845f06a8 (patch) | |
tree | 23dd45a491cf77999a09e3dfbf842ab4ca4707e6 | |
parent | d2457e6d8f62a12b3b74791cfd3f5808168c8a71 (diff) | |
download | llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.zip llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.tar.gz llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.tar.bz2 |
[LLVM][AsmParser] Make error reporting of lexer errors more precise (#111077)
When the lexer hits an error during assembly parsing, it just logs the
error in the `ErrorMsg` object, and it's possible that error gets
overwritten later in by the parser with a more generic error message.
This makes some errors reported by the LLVM's asm parser less precise.
Address this by not having the parser overwrite the message logged by
the lexer by assigning error messages generated by the lexer higher
"priority" than those generated by parser and overwriting the error
message only if its same or higher priority.
Update several Assembler unit test to now check the more precise error
messaged reported by the LLVM's AsmParser.
-rw-r--r-- | llvm/include/llvm/AsmParser/LLLexer.h | 34 | ||||
-rw-r--r-- | llvm/include/llvm/AsmParser/LLParser.h | 6 | ||||
-rw-r--r-- | llvm/lib/AsmParser/LLLexer.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/AsmParser/LLParser.cpp | 16 | ||||
-rw-r--r-- | llvm/test/Assembler/allockind-missing.ll | 4 | ||||
-rw-r--r-- | llvm/test/Assembler/invalid-inttype.ll | 4 | ||||
-rw-r--r-- | llvm/test/Assembler/invalid-landingpad.ll | 4 | ||||
-rw-r--r-- | llvm/test/Assembler/invalid-name.ll | bin | 142 -> 207 bytes | |||
-rw-r--r-- | llvm/test/Assembler/invalid-name2.ll | bin | 120 -> 185 bytes |
9 files changed, 75 insertions, 37 deletions
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index bd929db..a9f51fb 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -28,7 +28,20 @@ namespace llvm { class LLLexer { const char *CurPtr; StringRef CurBuf; - SMDiagnostic &ErrorInfo; + + enum class ErrorPriority { + None, // No error message present. + Parser, // Errors issued by parser. + Lexer, // Errors issued by lexer. + }; + + struct ErrorInfo { + ErrorPriority Priority = ErrorPriority::None; + SMDiagnostic &Error; + + explicit ErrorInfo(SMDiagnostic &Error) : Error(Error) {} + } ErrorInfo; + SourceMgr &SM; LLVMContext &Context; @@ -66,8 +79,13 @@ namespace llvm { IgnoreColonInIdentifiers = val; } - bool Error(LocTy ErrorLoc, const Twine &Msg) const; - bool Error(const Twine &Msg) const { return Error(getLoc(), Msg); } + // This returns true as a convenience for the parser functions that return + // true on error. + bool ParseError(LocTy ErrorLoc, const Twine &Msg) { + Error(ErrorLoc, Msg, ErrorPriority::Parser); + return true; + } + bool ParseError(const Twine &Msg) { return ParseError(getLoc(), Msg); } void Warning(LocTy WarningLoc, const Twine &Msg) const; void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); } @@ -97,7 +115,15 @@ namespace llvm { uint64_t atoull(const char *Buffer, const char *End); uint64_t HexIntToVal(const char *Buffer, const char *End); void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]); - void FP80HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]); + void FP80HexToIntPair(const char *Buffer, const char *End, + uint64_t Pair[2]); + + void Error(LocTy ErrorLoc, const Twine &Msg, ErrorPriority Origin); + + void LexError(LocTy ErrorLoc, const Twine &Msg) { + Error(ErrorLoc, Msg, ErrorPriority::Lexer); + } + void LexError(const Twine &Msg) { LexError(getLoc(), Msg); } }; } // end namespace llvm diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 9576b93..1ef8b8f 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -207,11 +207,11 @@ namespace llvm { LLVMContext &getContext() { return Context; } private: - bool error(LocTy L, const Twine &Msg) const { return Lex.Error(L, Msg); } - bool tokError(const Twine &Msg) const { return error(Lex.getLoc(), Msg); } + bool error(LocTy L, const Twine &Msg) { return Lex.ParseError(L, Msg); } + bool tokError(const Twine &Msg) { return error(Lex.getLoc(), Msg); } bool checkValueID(LocTy L, StringRef Kind, StringRef Prefix, - unsigned NextID, unsigned ID) const; + unsigned NextID, unsigned ID); /// Restore the internal name and slot mappings using the mappings that /// were created at an earlier parsing stage. diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index a3e47da..f71cbe1 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -25,9 +25,21 @@ using namespace llvm; -bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const { - ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg); - return true; +// Both the lexer and parser can issue error messages. If the lexer issues a +// lexer error, since we do not terminate execution immediately, usually that +// is followed by the parser issuing a parser error. However, the error issued +// by the lexer is more relevant in that case as opposed to potentially more +// generic parser error. So instead of always recording the last error message +// use the `Priority` to establish a priority, with Lexer > Parser > None. We +// record the issued message only if the message has same or higher priority +// than the existing one. This prevents lexer errors from being overwritten by +// parser errors. +void LLLexer::Error(LocTy ErrorLoc, const Twine &Msg, + LLLexer::ErrorPriority Priority) { + if (Priority < ErrorInfo.Priority) + return; + ErrorInfo.Error = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg); + ErrorInfo.Priority = Priority; } void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const { @@ -49,7 +61,7 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) { Result *= 10; Result += *Buffer-'0'; if (Result < OldRes) { // Uh, oh, overflow detected!!! - Error("constant bigger than 64 bits detected!"); + LexError("constant bigger than 64 bits detected!"); return 0; } } @@ -64,7 +76,7 @@ uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { Result += hexDigitValue(*Buffer); if (Result < OldRes) { // Uh, oh, overflow detected!!! - Error("constant bigger than 64 bits detected!"); + LexError("constant bigger than 64 bits detected!"); return 0; } } @@ -87,7 +99,7 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End, Pair[1] += hexDigitValue(*Buffer); } if (Buffer != End) - Error("constant bigger than 128 bits detected!"); + LexError("constant bigger than 128 bits detected!"); } /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into @@ -106,7 +118,7 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, Pair[0] += hexDigitValue(*Buffer); } if (Buffer != End) - Error("constant bigger than 128 bits detected!"); + LexError("constant bigger than 128 bits detected!"); } // UnEscapeLexed - Run through the specified buffer and change \xx codes to the @@ -273,14 +285,14 @@ lltok::Kind LLLexer::LexDollar() { int CurChar = getNextChar(); if (CurChar == EOF) { - Error("end of file in COMDAT variable name"); + LexError("end of file in COMDAT variable name"); return lltok::Error; } if (CurChar == '"') { StrVal.assign(TokStart + 2, CurPtr - 1); UnEscapeLexed(StrVal); if (StringRef(StrVal).contains(0)) { - Error("Null bytes are not allowed in names"); + LexError("Null bytes are not allowed in names"); return lltok::Error; } return lltok::ComdatVar; @@ -302,7 +314,7 @@ lltok::Kind LLLexer::ReadString(lltok::Kind kind) { int CurChar = getNextChar(); if (CurChar == EOF) { - Error("end of file in string constant"); + LexError("end of file in string constant"); return lltok::Error; } if (CurChar == '"') { @@ -342,7 +354,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { uint64_t Val = atoull(TokStart + 1, CurPtr); if ((unsigned)Val != Val) - Error("invalid value number (too large)!"); + LexError("invalid value number (too large)!"); UIntVal = unsigned(Val); return Token; } @@ -356,14 +368,14 @@ lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { int CurChar = getNextChar(); if (CurChar == EOF) { - Error("end of file in global variable name"); + LexError("end of file in global variable name"); return lltok::Error; } if (CurChar == '"') { StrVal.assign(TokStart+2, CurPtr-1); UnEscapeLexed(StrVal); if (StringRef(StrVal).contains(0)) { - Error("Null bytes are not allowed in names"); + LexError("Null bytes are not allowed in names"); return lltok::Error; } return Var; @@ -398,7 +410,7 @@ lltok::Kind LLLexer::LexQuote() { if (CurPtr[0] == ':') { ++CurPtr; if (StringRef(StrVal).contains(0)) { - Error("Null bytes are not allowed in names"); + LexError("Null bytes are not allowed in names"); kind = lltok::Error; } else { kind = lltok::LabelStr; @@ -480,7 +492,7 @@ lltok::Kind LLLexer::LexIdentifier() { uint64_t NumBits = atoull(StartChar, CurPtr); if (NumBits < IntegerType::MIN_INT_BITS || NumBits > IntegerType::MAX_INT_BITS) { - Error("bitwidth for integer type out of range!"); + LexError("bitwidth for integer type out of range!"); return lltok::Error; } TyVal = IntegerType::get(Context, NumBits); @@ -1109,7 +1121,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { uint64_t Val = atoull(TokStart, CurPtr); ++CurPtr; // Skip the colon. if ((unsigned)Val != Val) - Error("invalid value number (too large)!"); + LexError("invalid value number (too large)!"); UIntVal = unsigned(Val); return lltok::LabelID; } diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index d84521d..6450d8b 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -3220,7 +3220,7 @@ bool LLParser::parseOptionalOperandBundles( } bool LLParser::checkValueID(LocTy Loc, StringRef Kind, StringRef Prefix, - unsigned NextID, unsigned ID) const { + unsigned NextID, unsigned ID) { if (ID < NextID) return error(Loc, Kind + " expected to be numbered '" + Prefix + Twine(NextID) + "' or greater"); @@ -5192,7 +5192,7 @@ bool LLParser::parseDILocation(MDNode *&Result, bool IsDistinct) { /// ::= distinct !DIAssignID() bool LLParser::parseDIAssignID(MDNode *&Result, bool IsDistinct) { if (!IsDistinct) - return Lex.Error("missing 'distinct', required for !DIAssignID()"); + return tokError("missing 'distinct', required for !DIAssignID()"); Lex.Lex(); @@ -5500,7 +5500,7 @@ bool LLParser::parseDIFile(MDNode *&Result, bool IsDistinct) { if (checksumkind.Seen && checksum.Seen) OptChecksum.emplace(checksumkind.Val, checksum.Val); else if (checksumkind.Seen || checksum.Seen) - return Lex.Error("'checksumkind' and 'checksum' must be provided together"); + return tokError("'checksumkind' and 'checksum' must be provided together"); MDString *Source = nullptr; if (source.Seen) @@ -5519,7 +5519,7 @@ bool LLParser::parseDIFile(MDNode *&Result, bool IsDistinct) { /// sysroot: "/", sdk: "MacOSX.sdk") bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) { if (!IsDistinct) - return Lex.Error("missing 'distinct', required for !DICompileUnit"); + return tokError("missing 'distinct', required for !DICompileUnit"); #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ REQUIRED(language, DwarfLangField, ); \ @@ -5599,7 +5599,7 @@ bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) { : DISubprogram::toSPFlags(isLocal.Val, isDefinition.Val, isOptimized.Val, virtuality.Val); if ((SPFlags & DISubprogram::SPFlagDefinition) && !IsDistinct) - return Lex.Error( + return error( Loc, "missing 'distinct', required for !DISubprogram that is a Definition"); Result = GET_OR_DISTINCT( @@ -7952,10 +7952,10 @@ bool LLParser::parseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { // array constant. if (CT == LandingPadInst::Catch) { if (isa<ArrayType>(V->getType())) - error(VLoc, "'catch' clause has an invalid type"); + return error(VLoc, "'catch' clause has an invalid type"); } else { if (!isa<ArrayType>(V->getType())) - error(VLoc, "'filter' clause has an invalid type"); + return error(VLoc, "'filter' clause has an invalid type"); } Constant *CV = dyn_cast<Constant>(V); @@ -8639,7 +8639,7 @@ bool LLParser::parseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes) { if (parseToken(lltok::lbrace, "expected '{' here")) return true; if (Lex.getKind() == lltok::rbrace) - return Lex.Error("expected non-empty list of uselistorder indexes"); + return tokError("expected non-empty list of uselistorder indexes"); // Use Offset, Max, and IsOrdered to check consistency of indexes. The // indexes should be distinct numbers in the range [0, size-1], and should diff --git a/llvm/test/Assembler/allockind-missing.ll b/llvm/test/Assembler/allockind-missing.ll index e8672fe..4fb3f65 100644 --- a/llvm/test/Assembler/allockind-missing.ll +++ b/llvm/test/Assembler/allockind-missing.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s +; CHECK: [[FILE]]:[[@LINE+1]]:30: error: expected allockind value declare void @f0() allockind() -; CHECK: :[[#@LINE-1]]:30: error: expected allockind value diff --git a/llvm/test/Assembler/invalid-inttype.ll b/llvm/test/Assembler/invalid-inttype.ll index df31755..c8aa7c6 100644 --- a/llvm/test/Assembler/invalid-inttype.ll +++ b/llvm/test/Assembler/invalid-inttype.ll @@ -1,5 +1,5 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s ; i8388609 is the smallest integer type that can't be represented in LLVM IR +; CHECK: [[FILE]]:[[@LINE+1]]:21: error: bitwidth for integer type out of range! @i2 = common global i8388609 0, align 4 -; CHECK: expected type diff --git a/llvm/test/Assembler/invalid-landingpad.ll b/llvm/test/Assembler/invalid-landingpad.ll index 306e943..805d3db 100644 --- a/llvm/test/Assembler/invalid-landingpad.ll +++ b/llvm/test/Assembler/invalid-landingpad.ll @@ -1,7 +1,7 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s -; CHECK: clause argument must be a constant define void @test(i32 %in) personality ptr null { +; CHECK: [[FILE]]:[[@LINE+1]]:24: error: 'filter' clause has an invalid type landingpad {} filter i32 %in } diff --git a/llvm/test/Assembler/invalid-name.ll b/llvm/test/Assembler/invalid-name.ll Binary files differindex 0681ea5..74133e6 100644 --- a/llvm/test/Assembler/invalid-name.ll +++ b/llvm/test/Assembler/invalid-name.ll diff --git a/llvm/test/Assembler/invalid-name2.ll b/llvm/test/Assembler/invalid-name2.ll Binary files differindex 384dee6..8a84879 100644 --- a/llvm/test/Assembler/invalid-name2.ll +++ b/llvm/test/Assembler/invalid-name2.ll |