[LLVM][AsmParser] Make error reporting of lexer errors more precise (#111077)

When the lexer hits an error during assembly parsing, it just logs the error in the `ErrorMsg` object, and it's possible that error gets overwritten later in by the parser with a more generic error message. This makes some errors reported by the LLVM's asm parser less precise. Address this by not having the parser overwrite the message logged by the lexer by assigning error messages generated by the lexer higher "priority" than those generated by parser and overwriting the error message only if its same or higher priority. Update several Assembler unit test to now check the more precise error messaged reported by the LLVM's AsmParser.
author: Rahul Joshi <rjoshi@nvidia.com> 2024-10-07 14:02:28 -0700
committer: GitHub <noreply@github.com> 2024-10-07 14:02:28 -0700
commit: 3bace7efe00cbc5caadd2926027f4675845f06a8 (patch)
tree: 23dd45a491cf77999a09e3dfbf842ab4ca4707e6 /llvm/lib/AsmParser/LLLexer.cpp
parent: d2457e6d8f62a12b3b74791cfd3f5808168c8a71 (diff)
download: llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.zip
llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.tar.gz
llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.tar.bz2
1 files changed, 28 insertions, 16 deletions
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index a3e47da..f71cbe1 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -25,9 +25,21 @@
 
 using namespace llvm;
 
-bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
-  ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
-  return true;
+// Both the lexer and parser can issue error messages. If the lexer issues a
+// lexer error, since we do not terminate execution immediately, usually that
+// is followed by the parser issuing a parser error. However, the error issued
+// by the lexer is more relevant in that case as opposed to potentially more
+// generic parser error. So instead of always recording the last error message
+// use the `Priority` to establish a priority, with Lexer > Parser > None. We
+// record the issued message only if the message has same or higher priority
+// than the existing one. This prevents lexer errors from being overwritten by
+// parser errors.
+void LLLexer::Error(LocTy ErrorLoc, const Twine &Msg,
+                    LLLexer::ErrorPriority Priority) {
+  if (Priority < ErrorInfo.Priority)
+    return;
+  ErrorInfo.Error = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
+  ErrorInfo.Priority = Priority;
 }
 
 void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
@@ -49,7 +61,7 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
     Result *= 10;
     Result += *Buffer-'0';
     if (Result < OldRes) {  // Uh, oh, overflow detected!!!
-      Error("constant bigger than 64 bits detected!");
+      LexError("constant bigger than 64 bits detected!");
       return 0;
     }
   }
@@ -64,7 +76,7 @@ uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
     Result += hexDigitValue(*Buffer);
 
     if (Result < OldRes) {   // Uh, oh, overflow detected!!!
-      Error("constant bigger than 64 bits detected!");
+      LexError("constant bigger than 64 bits detected!");
       return 0;
     }
   }
@@ -87,7 +99,7 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End,
     Pair[1] += hexDigitValue(*Buffer);
   }
   if (Buffer != End)
-    Error("constant bigger than 128 bits detected!");
+    LexError("constant bigger than 128 bits detected!");
 }
 
 /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
@@ -106,7 +118,7 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
     Pair[0] += hexDigitValue(*Buffer);
   }
   if (Buffer != End)
-    Error("constant bigger than 128 bits detected!");
+    LexError("constant bigger than 128 bits detected!");
 }
 
 // UnEscapeLexed - Run through the specified buffer and change \xx codes to the
@@ -273,14 +285,14 @@ lltok::Kind LLLexer::LexDollar() {
       int CurChar = getNextChar();
 
       if (CurChar == EOF) {
-        Error("end of file in COMDAT variable name");
+        LexError("end of file in COMDAT variable name");
         return lltok::Error;
       }
       if (CurChar == '"') {
         StrVal.assign(TokStart + 2, CurPtr - 1);
         UnEscapeLexed(StrVal);
         if (StringRef(StrVal).contains(0)) {
-          Error("Null bytes are not allowed in names");
+          LexError("Null bytes are not allowed in names");
           return lltok::Error;
         }
         return lltok::ComdatVar;
@@ -302,7 +314,7 @@ lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
     int CurChar = getNextChar();
 
     if (CurChar == EOF) {
-      Error("end of file in string constant");
+      LexError("end of file in string constant");
       return lltok::Error;
     }
     if (CurChar == '"') {
@@ -342,7 +354,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
 
   uint64_t Val = atoull(TokStart + 1, CurPtr);
   if ((unsigned)Val != Val)
-    Error("invalid value number (too large)!");
+    LexError("invalid value number (too large)!");
   UIntVal = unsigned(Val);
   return Token;
 }
@@ -356,14 +368,14 @@ lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
       int CurChar = getNextChar();
 
       if (CurChar == EOF) {
-        Error("end of file in global variable name");
+        LexError("end of file in global variable name");
         return lltok::Error;
       }
       if (CurChar == '"') {
         StrVal.assign(TokStart+2, CurPtr-1);
         UnEscapeLexed(StrVal);
         if (StringRef(StrVal).contains(0)) {
-          Error("Null bytes are not allowed in names");
+          LexError("Null bytes are not allowed in names");
           return lltok::Error;
         }
         return Var;
@@ -398,7 +410,7 @@ lltok::Kind LLLexer::LexQuote() {
   if (CurPtr[0] == ':') {
     ++CurPtr;
     if (StringRef(StrVal).contains(0)) {
-      Error("Null bytes are not allowed in names");
+      LexError("Null bytes are not allowed in names");
       kind = lltok::Error;
     } else {
       kind = lltok::LabelStr;
@@ -480,7 +492,7 @@ lltok::Kind LLLexer::LexIdentifier() {
     uint64_t NumBits = atoull(StartChar, CurPtr);
     if (NumBits < IntegerType::MIN_INT_BITS ||
         NumBits > IntegerType::MAX_INT_BITS) {
-      Error("bitwidth for integer type out of range!");
+      LexError("bitwidth for integer type out of range!");
       return lltok::Error;
     }
     TyVal = IntegerType::get(Context, NumBits);
@@ -1109,7 +1121,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
     uint64_t Val = atoull(TokStart, CurPtr);
     ++CurPtr; // Skip the colon.
     if ((unsigned)Val != Val)
-      Error("invalid value number (too large)!");
+      LexError("invalid value number (too large)!");
     UIntVal = unsigned(Val);
     return lltok::LabelID;
   }
author	Rahul Joshi <rjoshi@nvidia.com>	2024-10-07 14:02:28 -0700
committer	GitHub <noreply@github.com>	2024-10-07 14:02:28 -0700
commit	3bace7efe00cbc5caadd2926027f4675845f06a8 (patch)
tree	23dd45a491cf77999a09e3dfbf842ab4ca4707e6 /llvm/lib/AsmParser/LLLexer.cpp
parent	d2457e6d8f62a12b3b74791cfd3f5808168c8a71 (diff)
download	llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.zip llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.tar.gz llvm-3bace7efe00cbc5caadd2926027f4675845f06a8.tar.bz2