[LLVM][AsmParser] Make error reporting of lexer errors more precise (#111077)

When the lexer hits an error during assembly parsing, it just logs the error in the `ErrorMsg` object, and it's possible that error gets overwritten later in by the parser with a more generic error message. This makes some errors reported by the LLVM's asm parser less precise. Address this by not having the parser overwrite the message logged by the lexer by assigning error messages generated by the lexer higher "priority" than those generated by parser and overwriting the error message only if its same or higher priority. Update several Assembler unit test to now check the more precise error messaged reported by the LLVM's AsmParser.
author: Rahul Joshi <rjoshi@nvidia.com> 2024-10-07 14:02:28 -0700
committer: GitHub <noreply@github.com> 2024-10-07 14:02:28 -0700
commit: 3bace7efe00cbc5caadd2926027f4675845f06a8 (patch)
tree: 23dd45a491cf77999a09e3dfbf842ab4ca4707e6
parent: d2457e6d8f62a12b3b74791cfd3f5808168c8a71 (diff)
download: llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.zip
llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.tar.gz
llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.tar.bz2
9 files changed, 75 insertions, 37 deletions
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index bd929db..a9f51fb 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -28,7 +28,20 @@ namespace llvm {
   class LLLexer {
     const char *CurPtr;
     StringRef CurBuf;
-    SMDiagnostic &ErrorInfo;
+
+    enum class ErrorPriority {
+      None,   // No error message present.
+      Parser, // Errors issued by parser.
+      Lexer,  // Errors issued by lexer.
+    };
+
+    struct ErrorInfo {
+      ErrorPriority Priority = ErrorPriority::None;
+      SMDiagnostic &Error;
+
+      explicit ErrorInfo(SMDiagnostic &Error) : Error(Error) {}
+    } ErrorInfo;
+
     SourceMgr &SM;
     LLVMContext &Context;
 
@@ -66,8 +79,13 @@ namespace llvm {
       IgnoreColonInIdentifiers = val;
     }
 
-    bool Error(LocTy ErrorLoc, const Twine &Msg) const;
-    bool Error(const Twine &Msg) const { return Error(getLoc(), Msg); }
+    // This returns true as a convenience for the parser functions that return
+    // true on error.
+    bool ParseError(LocTy ErrorLoc, const Twine &Msg) {
+      Error(ErrorLoc, Msg, ErrorPriority::Parser);
+      return true;
+    }
+    bool ParseError(const Twine &Msg) { return ParseError(getLoc(), Msg); }
 
     void Warning(LocTy WarningLoc, const Twine &Msg) const;
     void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); }
@@ -97,7 +115,15 @@ namespace llvm {
     uint64_t atoull(const char *Buffer, const char *End);
     uint64_t HexIntToVal(const char *Buffer, const char *End);
     void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]);
-    void FP80HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]);
+    void FP80HexToIntPair(const char *Buffer, const char *End,
+                          uint64_t Pair[2]);
+
+    void Error(LocTy ErrorLoc, const Twine &Msg, ErrorPriority Origin);
+
+    void LexError(LocTy ErrorLoc, const Twine &Msg) {
+      Error(ErrorLoc, Msg, ErrorPriority::Lexer);
+    }
+    void LexError(const Twine &Msg) { LexError(getLoc(), Msg); }
   };
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index 9576b93..1ef8b8f 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -207,11 +207,11 @@ namespace llvm {
     LLVMContext &getContext() { return Context; }
 
   private:
-    bool error(LocTy L, const Twine &Msg) const { return Lex.Error(L, Msg); }
-    bool tokError(const Twine &Msg) const { return error(Lex.getLoc(), Msg); }
+    bool error(LocTy L, const Twine &Msg) { return Lex.ParseError(L, Msg); }
+    bool tokError(const Twine &Msg) { return error(Lex.getLoc(), Msg); }
 
     bool checkValueID(LocTy L, StringRef Kind, StringRef Prefix,
-                      unsigned NextID, unsigned ID) const;
+                      unsigned NextID, unsigned ID);
 
     /// Restore the internal name and slot mappings using the mappings that
     /// were created at an earlier parsing stage.
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index a3e47da..f71cbe1 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -25,9 +25,21 @@
 
 using namespace llvm;
 
-bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
-  ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
-  return true;
+// Both the lexer and parser can issue error messages. If the lexer issues a
+// lexer error, since we do not terminate execution immediately, usually that
+// is followed by the parser issuing a parser error. However, the error issued
+// by the lexer is more relevant in that case as opposed to potentially more
+// generic parser error. So instead of always recording the last error message
+// use the `Priority` to establish a priority, with Lexer > Parser > None. We
+// record the issued message only if the message has same or higher priority
+// than the existing one. This prevents lexer errors from being overwritten by
+// parser errors.
+void LLLexer::Error(LocTy ErrorLoc, const Twine &Msg,
+                    LLLexer::ErrorPriority Priority) {
+  if (Priority < ErrorInfo.Priority)
+    return;
+  ErrorInfo.Error = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
+  ErrorInfo.Priority = Priority;
 }
 
 void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
@@ -49,7 +61,7 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
     Result *= 10;
     Result += *Buffer-'0';
     if (Result < OldRes) {  // Uh, oh, overflow detected!!!
-      Error("constant bigger than 64 bits detected!");
+      LexError("constant bigger than 64 bits detected!");
       return 0;
     }
   }
@@ -64,7 +76,7 @@ uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
     Result += hexDigitValue(*Buffer);
 
     if (Result < OldRes) {   // Uh, oh, overflow detected!!!
-      Error("constant bigger than 64 bits detected!");
+      LexError("constant bigger than 64 bits detected!");
       return 0;
     }
   }
@@ -87,7 +99,7 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End,
     Pair[1] += hexDigitValue(*Buffer);
   }
   if (Buffer != End)
-    Error("constant bigger than 128 bits detected!");
+    LexError("constant bigger than 128 bits detected!");
 }
 
 /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
@@ -106,7 +118,7 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
     Pair[0] += hexDigitValue(*Buffer);
   }
   if (Buffer != End)
-    Error("constant bigger than 128 bits detected!");
+    LexError("constant bigger than 128 bits detected!");
 }
 
 // UnEscapeLexed - Run through the specified buffer and change \xx codes to the
@@ -273,14 +285,14 @@ lltok::Kind LLLexer::LexDollar() {
       int CurChar = getNextChar();
 
       if (CurChar == EOF) {
-        Error("end of file in COMDAT variable name");
+        LexError("end of file in COMDAT variable name");
         return lltok::Error;
       }
       if (CurChar == '"') {
         StrVal.assign(TokStart + 2, CurPtr - 1);
         UnEscapeLexed(StrVal);
         if (StringRef(StrVal).contains(0)) {
-          Error("Null bytes are not allowed in names");
+          LexError("Null bytes are not allowed in names");
           return lltok::Error;
         }
         return lltok::ComdatVar;
@@ -302,7 +314,7 @@ lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
     int CurChar = getNextChar();
 
     if (CurChar == EOF) {
-      Error("end of file in string constant");
+      LexError("end of file in string constant");
       return lltok::Error;
     }
     if (CurChar == '"') {
@@ -342,7 +354,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
 
   uint64_t Val = atoull(TokStart + 1, CurPtr);
   if ((unsigned)Val != Val)
-    Error("invalid value number (too large)!");
+    LexError("invalid value number (too large)!");
   UIntVal = unsigned(Val);
   return Token;
 }
@@ -356,14 +368,14 @@ lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
       int CurChar = getNextChar();
 
       if (CurChar == EOF) {
-        Error("end of file in global variable name");
+        LexError("end of file in global variable name");
         return lltok::Error;
       }
       if (CurChar == '"') {
         StrVal.assign(TokStart+2, CurPtr-1);
         UnEscapeLexed(StrVal);
         if (StringRef(StrVal).contains(0)) {
-          Error("Null bytes are not allowed in names");
+          LexError("Null bytes are not allowed in names");
           return lltok::Error;
         }
         return Var;
@@ -398,7 +410,7 @@ lltok::Kind LLLexer::LexQuote() {
   if (CurPtr[0] == ':') {
     ++CurPtr;
     if (StringRef(StrVal).contains(0)) {
-      Error("Null bytes are not allowed in names");
+      LexError("Null bytes are not allowed in names");
       kind = lltok::Error;
     } else {
       kind = lltok::LabelStr;
@@ -480,7 +492,7 @@ lltok::Kind LLLexer::LexIdentifier() {
     uint64_t NumBits = atoull(StartChar, CurPtr);
     if (NumBits < IntegerType::MIN_INT_BITS ||
         NumBits > IntegerType::MAX_INT_BITS) {
-      Error("bitwidth for integer type out of range!");
+      LexError("bitwidth for integer type out of range!");
       return lltok::Error;
     }
     TyVal = IntegerType::get(Context, NumBits);
@@ -1109,7 +1121,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
     uint64_t Val = atoull(TokStart, CurPtr);
     ++CurPtr; // Skip the colon.
     if ((unsigned)Val != Val)
-      Error("invalid value number (too large)!");
+      LexError("invalid value number (too large)!");
     UIntVal = unsigned(Val);
     return lltok::LabelID;
   }
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index d84521d..6450d8b 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -3220,7 +3220,7 @@ bool LLParser::parseOptionalOperandBundles(
 }
 
 bool LLParser::checkValueID(LocTy Loc, StringRef Kind, StringRef Prefix,
-                            unsigned NextID, unsigned ID) const {
+                            unsigned NextID, unsigned ID) {
   if (ID < NextID)
     return error(Loc, Kind + " expected to be numbered '" + Prefix +
                           Twine(NextID) + "' or greater");
@@ -5192,7 +5192,7 @@ bool LLParser::parseDILocation(MDNode *&Result, bool IsDistinct) {
 ///   ::= distinct !DIAssignID()
 bool LLParser::parseDIAssignID(MDNode *&Result, bool IsDistinct) {
   if (!IsDistinct)
-    return Lex.Error("missing 'distinct', required for !DIAssignID()");
+    return tokError("missing 'distinct', required for !DIAssignID()");
 
   Lex.Lex();
 
@@ -5500,7 +5500,7 @@ bool LLParser::parseDIFile(MDNode *&Result, bool IsDistinct) {
   if (checksumkind.Seen && checksum.Seen)
     OptChecksum.emplace(checksumkind.Val, checksum.Val);
   else if (checksumkind.Seen || checksum.Seen)
-    return Lex.Error("'checksumkind' and 'checksum' must be provided together");
+    return tokError("'checksumkind' and 'checksum' must be provided together");
 
   MDString *Source = nullptr;
   if (source.Seen)
@@ -5519,7 +5519,7 @@ bool LLParser::parseDIFile(MDNode *&Result, bool IsDistinct) {
 ///                      sysroot: "/", sdk: "MacOSX.sdk")
 bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
   if (!IsDistinct)
-    return Lex.Error("missing 'distinct', required for !DICompileUnit");
+    return tokError("missing 'distinct', required for !DICompileUnit");
 
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(language, DwarfLangField, );                                        \
@@ -5599,7 +5599,7 @@ bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) {
                    : DISubprogram::toSPFlags(isLocal.Val, isDefinition.Val,
                                              isOptimized.Val, virtuality.Val);
   if ((SPFlags & DISubprogram::SPFlagDefinition) && !IsDistinct)
-    return Lex.Error(
+    return error(
         Loc,
         "missing 'distinct', required for !DISubprogram that is a Definition");
   Result = GET_OR_DISTINCT(
@@ -7952,10 +7952,10 @@ bool LLParser::parseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
     // array constant.
     if (CT == LandingPadInst::Catch) {
       if (isa<ArrayType>(V->getType()))
-        error(VLoc, "'catch' clause has an invalid type");
+        return error(VLoc, "'catch' clause has an invalid type");
     } else {
       if (!isa<ArrayType>(V->getType()))
-        error(VLoc, "'filter' clause has an invalid type");
+        return error(VLoc, "'filter' clause has an invalid type");
     }
 
     Constant *CV = dyn_cast<Constant>(V);
@@ -8639,7 +8639,7 @@ bool LLParser::parseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes) {
   if (parseToken(lltok::lbrace, "expected '{' here"))
     return true;
   if (Lex.getKind() == lltok::rbrace)
-    return Lex.Error("expected non-empty list of uselistorder indexes");
+    return tokError("expected non-empty list of uselistorder indexes");
 
   // Use Offset, Max, and IsOrdered to check consistency of indexes.  The
   // indexes should be distinct numbers in the range [0, size-1], and should
diff --git a/llvm/test/Assembler/allockind-missing.ll b/llvm/test/Assembler/allockind-missing.ll
index e8672fe..4fb3f65 100644
--- a/llvm/test/Assembler/allockind-missing.ll
+++ b/llvm/test/Assembler/allockind-missing.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: not llvm-as --disable-output %s  2>&1 | FileCheck -DFILE=%s %s
 
+; CHECK: [[FILE]]:[[@LINE+1]]:30: error: expected allockind value
 declare void @f0() allockind()
-; CHECK: :[[#@LINE-1]]:30: error: expected allockind value
diff --git a/llvm/test/Assembler/invalid-inttype.ll b/llvm/test/Assembler/invalid-inttype.ll
index df31755..c8aa7c6 100644
--- a/llvm/test/Assembler/invalid-inttype.ll
+++ b/llvm/test/Assembler/invalid-inttype.ll
@@ -1,5 +1,5 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s
 
 ; i8388609 is the smallest integer type that can't be represented in LLVM IR
+; CHECK: [[FILE]]:[[@LINE+1]]:21: error: bitwidth for integer type out of range!
 @i2 = common global i8388609 0, align 4
-; CHECK: expected type
diff --git a/llvm/test/Assembler/invalid-landingpad.ll b/llvm/test/Assembler/invalid-landingpad.ll
index 306e943..805d3db 100644
--- a/llvm/test/Assembler/invalid-landingpad.ll
+++ b/llvm/test/Assembler/invalid-landingpad.ll
@@ -1,7 +1,7 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s
 
-; CHECK: clause argument must be a constant
 
 define void @test(i32 %in) personality ptr null {
+; CHECK: [[FILE]]:[[@LINE+1]]:24: error: 'filter' clause has an invalid type
   landingpad {} filter i32 %in
 }
diff --git a/llvm/test/Assembler/invalid-name.ll b/llvm/test/Assembler/invalid-name.ll
index 0681ea5..74133e6 100644
--- a/llvm/test/Assembler/invalid-name.ll
+++ b/llvm/test/Assembler/invalid-name.ll
diff --git a/llvm/test/Assembler/invalid-name2.ll b/llvm/test/Assembler/invalid-name2.ll
index 384dee6..8a84879 100644
--- a/llvm/test/Assembler/invalid-name2.ll
+++ b/llvm/test/Assembler/invalid-name2.ll
author	Rahul Joshi <rjoshi@nvidia.com>	2024-10-07 14:02:28 -0700
committer	GitHub <noreply@github.com>	2024-10-07 14:02:28 -0700
commit	3bace7efe00cbc5caadd2926027f4675845f06a8 (patch)
tree	23dd45a491cf77999a09e3dfbf842ab4ca4707e6
parent	d2457e6d8f62a12b3b74791cfd3f5808168c8a71 (diff)
download	llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.zip llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.tar.gz llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.tar.bz2