7 files changed, 97 insertions, 74 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 723d07e..c7a91f4c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -929,7 +929,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
                                             ThinOrFullLTOPhase Phase) {
     if (Level != OptimizationLevel::O0) {
       if (!isLTOPreLink(Phase)) {
-        if (getTargetTriple().isAMDGCN()) {
+        if (EnableAMDGPUAttributor && getTargetTriple().isAMDGCN()) {
           AMDGPUAttributorOptions Opts;
           MPM.addPass(AMDGPUAttributorPass(*this, Opts, Phase));
         }
@@ -966,7 +966,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
             PM.addPass(InternalizePass(mustPreserveGV));
             PM.addPass(GlobalDCEPass());
           }
-          if (EnableAMDGPUAttributor) {
+          if (EnableAMDGPUAttributor && getTargetTriple().isAMDGCN()) {
             AMDGPUAttributorOptions Opt;
             if (HasClosedWorldAssumption)
               Opt.IsClosedWorld = true;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index a8140c3..99ba043 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -2105,6 +2105,10 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
     return false;
   }
+
+  if (getModifiers().Lit != LitModifier::None)
+    return false;
+
   // TODO: We should avoid using host float here. It would be better to
   // check the float bit values which is what a few other places do.
   // We've had bot failures before due to weird NaN support on mips hosts.
@@ -2339,6 +2343,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
   bool CanUse64BitLiterals =
       AsmParser->has64BitLiterals() &&
       !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
+  LitModifier Lit = getModifiers().Lit;
   MCContext &Ctx = AsmParser->getContext();
 
   if (Imm.IsFPImm) { // We got fp literal token
@@ -2348,7 +2353,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
-      if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
+      if (Lit == LitModifier::None &&
+          AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
                                        AsmParser->hasInv2PiInlineImm())) {
         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
         return;
@@ -2372,14 +2378,20 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
 
         if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
              OpTy == AMDGPU::OPERAND_REG_INLINE_C_FP64 ||
-             OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64) &&
-            CanUse64BitLiterals && Lo_32(Val) != 0) {
-          Inst.addOperand(MCOperand::createExpr(
-              AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
-        } else {
-          Inst.addOperand(MCOperand::createImm(Val));
+             OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64)) {
+          if (CanUse64BitLiterals && Lit == LitModifier::None &&
+              (isInt<32>(Val) || isUInt<32>(Val))) {
+            // The floating-point operand will be verbalized as an
+            // integer one. If that integer happens to fit 32 bits, on
+            // re-assembling it will be intepreted as the high half of
+            // the actual value, so we have to wrap it into lit64().
+            Lit = LitModifier::Lit64;
+          } else if (Lit == LitModifier::Lit) {
+            // For FP64 operands lit() specifies the high half of the value.
+            Val = Hi_32(Val);
+          }
         }
-        return;
+        break;
       }
 
       // We don't allow fp literals in 64-bit integer instructions. It is
@@ -2388,19 +2400,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
       llvm_unreachable("fp literal in 64-bit integer instruction.");
 
     case AMDGPU::OPERAND_KIMM64:
-      if (CanUse64BitLiterals && Lo_32(Val) != 0) {
-        Inst.addOperand(MCOperand::createExpr(
-            AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
-      } else {
-        Inst.addOperand(MCOperand::createImm(Val));
-      }
-      return;
+      if (CanUse64BitLiterals && Lit == LitModifier::None &&
+          (isInt<32>(Val) || isUInt<32>(Val)))
+        Lit = LitModifier::Lit64;
+      break;
 
     case AMDGPU::OPERAND_REG_IMM_BF16:
     case AMDGPU::OPERAND_REG_INLINE_C_BF16:
     case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
     case AMDGPU::OPERAND_REG_IMM_V2BF16:
-      if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
+      if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
+          Literal == 0x3fc45f306725feed) {
         // This is the 1/(2*pi) which is going to be truncated to bf16 with the
         // loss of precision. The constant represents ideomatic fp32 value of
         // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
@@ -2438,14 +2448,19 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
       // We allow precision lost but not overflow or underflow. This should be
       // checked earlier in isLiteralImm()
 
-      uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
-      Inst.addOperand(MCOperand::createImm(ImmVal));
-      return;
+      Val = FPLiteral.bitcastToAPInt().getZExtValue();
+      break;
     }
     default:
       llvm_unreachable("invalid operand size");
     }
 
+    if (Lit != LitModifier::None) {
+      Inst.addOperand(
+          MCOperand::createExpr(AMDGPUMCExpr::createLit(Lit, Val, Ctx)));
+    } else {
+      Inst.addOperand(MCOperand::createImm(Val));
+    }
     return;
   }
 
@@ -2465,12 +2480,12 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
   case AMDGPU::OPERAND_REG_IMM_V2INT32:
   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
   case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
-    Inst.addOperand(MCOperand::createImm(Val));
-    return;
+    break;
 
   case AMDGPU::OPERAND_REG_IMM_INT64:
   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
-    if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
+    if (Lit == LitModifier::None &&
+        AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
       Inst.addOperand(MCOperand::createImm(Val));
       return;
     }
@@ -2479,22 +2494,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
     // truncated to uint32_t), if the target doesn't support 64-bit literals, or
     // the lit modifier is explicitly used, we need to truncate it to the 32
     // LSBs.
-    if (!AsmParser->has64BitLiterals() ||
-        getModifiers().Lit == LitModifier::Lit)
+    if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
       Val = Lo_32(Val);
-
-    if (CanUse64BitLiterals && (!isInt<32>(Val) || !isUInt<32>(Val))) {
-      Inst.addOperand(MCOperand::createExpr(
-          AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
-    } else {
-      Inst.addOperand(MCOperand::createImm(Val));
-    }
-    return;
+    break;
 
   case AMDGPU::OPERAND_REG_IMM_FP64:
   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
-    if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
+    if (Lit == LitModifier::None &&
+        AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
       Inst.addOperand(MCOperand::createImm(Val));
       return;
     }
@@ -2509,19 +2517,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
       // 1) explicitly forced by using lit modifier;
       // 2) the value is a valid 32-bit representation (signed or unsigned),
       // meanwhile not forced by lit64 modifier.
-      if (getModifiers().Lit == LitModifier::Lit ||
-          (getModifiers().Lit != LitModifier::Lit64 &&
-           (isInt<32>(Val) || isUInt<32>(Val))))
+      if (Lit == LitModifier::Lit ||
+          (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
         Val = static_cast<uint64_t>(Val) << 32;
     }
 
-    if (CanUse64BitLiterals && Lo_32(Val) != 0) {
-      Inst.addOperand(MCOperand::createExpr(
-          AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
-    } else {
-      Inst.addOperand(MCOperand::createImm(Val));
-    }
-    return;
+    // For FP64 operands lit() specifies the high half of the value.
+    if (Lit == LitModifier::Lit)
+      Val = Hi_32(Val);
+    break;
 
   case AMDGPU::OPERAND_REG_IMM_INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
@@ -2534,25 +2538,23 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
   case AMDGPU::OPERAND_KIMM32:
   case AMDGPU::OPERAND_KIMM16:
-    Inst.addOperand(MCOperand::createImm(Val));
-    return;
+    break;
 
   case AMDGPU::OPERAND_KIMM64:
-    if ((isInt<32>(Val) || isUInt<32>(Val)) &&
-        getModifiers().Lit != LitModifier::Lit64)
+    if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
       Val <<= 32;
-
-    if (CanUse64BitLiterals && Lo_32(Val) != 0) {
-      Inst.addOperand(MCOperand::createExpr(
-          AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
-    } else {
-      Inst.addOperand(MCOperand::createImm(Val));
-    }
-    return;
+    break;
 
   default:
     llvm_unreachable("invalid operand type");
   }
+
+  if (Lit != LitModifier::None) {
+    Inst.addOperand(
+        MCOperand::createExpr(AMDGPUMCExpr::createLit(Lit, Val, Ctx)));
+  } else {
+    Inst.addOperand(MCOperand::createImm(Val));
+  }
 }
 
 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
@@ -4821,12 +4823,15 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
     const MCOperand &MO = Inst.getOperand(OpIdx);
     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
+      bool IsLit = false;
       std::optional<int64_t> Imm;
       if (MO.isImm()) {
         Imm = MO.getImm();
       } else if (MO.isExpr()) {
-        if (isLitExpr(MO.getExpr()))
+        if (isLitExpr(MO.getExpr())) {
+          IsLit = true;
           Imm = getLitValue(MO.getExpr());
+        }
       } else {
         continue;
       }
@@ -4836,7 +4841,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
       } else if (!isInlineConstant(Inst, OpIdx)) {
         auto OpType = static_cast<AMDGPU::OperandType>(
             Desc.operands()[OpIdx].OperandType);
-        int64_t Value = encode32BitLiteral(*Imm, OpType);
+        int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
         if (NumLiterals == 0 || LiteralValue != Value) {
           LiteralValue = Value;
           ++NumLiterals;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index f11b373..be62395 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1551,7 +1551,7 @@ AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const {
   HasLiteral = true;
   Literal = Literal64 = Val;
 
-  bool UseLit64 = Lo_32(Literal64) != 0;
+  bool UseLit64 = Hi_32(Literal64) == 0;
   return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
                         LitModifier::Lit64, Literal64, getContext()))
                   : MCOperand::createImm(Literal64);
@@ -1584,11 +1584,11 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
   if (CanUse64BitLiterals) {
     if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
         OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64)
-      UseLit64 = !isInt<32>(Val) || !isUInt<32>(Val);
+      UseLit64 = false;
     else if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 ||
              OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 ||
              OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64)
-      UseLit64 = Lo_32(Val) != 0;
+      UseLit64 = Hi_32(Literal64) == 0;
   }
 
   return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
@@ -1614,12 +1614,12 @@ AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const {
   const MCOperandInfo &OpDesc = Desc.operands()[Inst.getNumOperands()];
   if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 ||
       OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) {
-    UseLit64 = !isInt<32>(Literal64) || !isUInt<32>(Literal64);
+    UseLit64 = false;
   } else {
     assert(OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 ||
            OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 ||
            OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64);
-    UseLit64 = Lo_32(Literal64) != 0;
+    UseLit64 = Hi_32(Literal64) == 0;
   }
 
   return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit(
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index e82f998..703ec0a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -73,7 +73,13 @@ void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
 void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
                                            const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
-  O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isExpr()) {
+    MAI.printExpr(O, *Op.getExpr());
+    return;
+  }
+
+  O << formatHex(Op.getImm() & 0xffffffff);
 }
 
 void AMDGPUInstPrinter::printFP64ImmOperand(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index f2879116..ea758bb 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -270,10 +270,19 @@ std::optional<uint64_t> AMDGPUMCCodeEmitter::getLitEncoding(
     const MCInstrDesc &Desc, const MCOperand &MO, unsigned OpNo,
     const MCSubtargetInfo &STI, bool HasMandatoryLiteral) const {
   const MCOperandInfo &OpInfo = Desc.operands()[OpNo];
-  int64_t Imm;
+  int64_t Imm = 0;
   if (MO.isExpr()) {
-    if (!MO.getExpr()->evaluateAsAbsolute(Imm))
-      return AMDGPU::getOperandSize(OpInfo) == 8 ? 254 : 255;
+    if (!MO.getExpr()->evaluateAsAbsolute(Imm) ||
+        AMDGPU::isLitExpr(MO.getExpr())) {
+      if (OpInfo.OperandType == AMDGPU::OPERAND_KIMM16 ||
+          OpInfo.OperandType == AMDGPU::OPERAND_KIMM32 ||
+          OpInfo.OperandType == AMDGPU::OPERAND_KIMM64)
+        return Imm;
+      if (STI.hasFeature(AMDGPU::Feature64BitLiterals) &&
+          AMDGPU::getOperandSize(OpInfo) == 8)
+        return 254;
+      return 255;
+    }
   } else {
     assert(!MO.isDFPImm());
 
@@ -452,13 +461,16 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
     // Yes! Encode it
     int64_t Imm = 0;
 
+    bool IsLit = false;
     if (Op.isImm())
       Imm = Op.getImm();
     else if (Op.isExpr()) {
-      if (const auto *C = dyn_cast<MCConstantExpr>(Op.getExpr()))
+      if (const auto *C = dyn_cast<MCConstantExpr>(Op.getExpr())) {
         Imm = C->getValue();
-      else if (AMDGPU::isLitExpr(Op.getExpr()))
+      } else if (AMDGPU::isLitExpr(Op.getExpr())) {
+        IsLit = true;
         Imm = AMDGPU::getLitValue(Op.getExpr());
+      }
     } else // Exprs will be replaced with a fixup value.
       llvm_unreachable("Must be immediate or expr");
 
@@ -468,7 +480,7 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
     } else {
       auto OpType =
           static_cast<AMDGPU::OperandType>(Desc.operands()[i].OperandType);
-      Imm = AMDGPU::encode32BitLiteral(Imm, OpType);
+      Imm = AMDGPU::encode32BitLiteral(Imm, OpType, IsLit);
       support::endian::write<uint32_t>(CB, Imm, llvm::endianness::little);
     }
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 76023d2..3e1b058 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3145,7 +3145,7 @@ bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
   return isUInt<32>(Val) || isInt<32>(Val);
 }
 
-int64_t encode32BitLiteral(int64_t Imm, OperandType Type) {
+int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) {
   switch (Type) {
   default:
     break;
@@ -3168,7 +3168,7 @@ int64_t encode32BitLiteral(int64_t Imm, OperandType Type) {
   case OPERAND_REG_INLINE_C_INT32:
     return Lo_32(Imm);
   case OPERAND_REG_IMM_FP64:
-    return Hi_32(Imm);
+    return IsLit ? Imm : Hi_32(Imm);
   }
   return Imm;
 }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 49b4d02..a01a5fd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1727,7 +1727,7 @@ LLVM_READNONE
 bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
 
 LLVM_READNONE
-int64_t encode32BitLiteral(int64_t Imm, OperandType Type);
+int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit);
 
 bool isArgPassedInSGPR(const Argument *Arg);