aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp149
1 files changed, 81 insertions, 68 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index d0c0822..99ba043 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1347,6 +1347,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ForcedDPP = false;
bool ForcedSDWA = false;
KernelScopeInfo KernelScope;
+ const unsigned HwMode;
/// @name Auto-generated Match Functions
/// {
@@ -1356,6 +1357,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
/// }
+ /// Get size of register operand
+ unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
+ assert(OpNo < Desc.NumOperands);
+ int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
+ return getRegBitWidth(RCID) / 8;
+ }
+
private:
void createConstantSymbol(StringRef Id, int64_t Val);
@@ -1442,9 +1450,9 @@ public:
using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
- const MCInstrInfo &MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
+ HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
MCAsmParserExtension::Initialize(Parser);
setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
@@ -2097,6 +2105,10 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
// Only plain immediates are inlinable (e.g. "clamp" attribute is not)
return false;
}
+
+ if (getModifiers().Lit != LitModifier::None)
+ return false;
+
// TODO: We should avoid using host float here. It would be better to
// check the float bit values which is what a few other places do.
// We've had bot failures before due to weird NaN support on mips hosts.
@@ -2331,6 +2343,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
bool CanUse64BitLiterals =
AsmParser->has64BitLiterals() &&
!(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
+ LitModifier Lit = getModifiers().Lit;
MCContext &Ctx = AsmParser->getContext();
if (Imm.IsFPImm) { // We got fp literal token
@@ -2340,7 +2353,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
- if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
+ if (Lit == LitModifier::None &&
+ AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
return;
@@ -2364,14 +2378,20 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
OpTy == AMDGPU::OPERAND_REG_INLINE_C_FP64 ||
- OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64) &&
- CanUse64BitLiterals && Lo_32(Val) != 0) {
- Inst.addOperand(MCOperand::createExpr(
- AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
- } else {
- Inst.addOperand(MCOperand::createImm(Val));
+ OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64)) {
+ if (CanUse64BitLiterals && Lit == LitModifier::None &&
+ (isInt<32>(Val) || isUInt<32>(Val))) {
+ // The floating-point operand will be verbalized as an
+ // integer one. If that integer happens to fit 32 bits, on
+ // re-assembling it will be intepreted as the high half of
+ // the actual value, so we have to wrap it into lit64().
+ Lit = LitModifier::Lit64;
+ } else if (Lit == LitModifier::Lit) {
+ // For FP64 operands lit() specifies the high half of the value.
+ Val = Hi_32(Val);
+ }
}
- return;
+ break;
}
// We don't allow fp literals in 64-bit integer instructions. It is
@@ -2380,19 +2400,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
llvm_unreachable("fp literal in 64-bit integer instruction.");
case AMDGPU::OPERAND_KIMM64:
- if (CanUse64BitLiterals && Lo_32(Val) != 0) {
- Inst.addOperand(MCOperand::createExpr(
- AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
- } else {
- Inst.addOperand(MCOperand::createImm(Val));
- }
- return;
+ if (CanUse64BitLiterals && Lit == LitModifier::None &&
+ (isInt<32>(Val) || isUInt<32>(Val)))
+ Lit = LitModifier::Lit64;
+ break;
case AMDGPU::OPERAND_REG_IMM_BF16:
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
case AMDGPU::OPERAND_REG_IMM_V2BF16:
- if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
+ if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
+ Literal == 0x3fc45f306725feed) {
// This is the 1/(2*pi) which is going to be truncated to bf16 with the
// loss of precision. The constant represents ideomatic fp32 value of
// 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
@@ -2430,14 +2448,19 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
// We allow precision lost but not overflow or underflow. This should be
// checked earlier in isLiteralImm()
- uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
- Inst.addOperand(MCOperand::createImm(ImmVal));
- return;
+ Val = FPLiteral.bitcastToAPInt().getZExtValue();
+ break;
}
default:
llvm_unreachable("invalid operand size");
}
+ if (Lit != LitModifier::None) {
+ Inst.addOperand(
+ MCOperand::createExpr(AMDGPUMCExpr::createLit(Lit, Val, Ctx)));
+ } else {
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
return;
}
@@ -2457,12 +2480,12 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_IMM_V2INT32:
case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
- Inst.addOperand(MCOperand::createImm(Val));
- return;
+ break;
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
- if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
+ if (Lit == LitModifier::None &&
+ AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
return;
}
@@ -2471,22 +2494,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
// truncated to uint32_t), if the target doesn't support 64-bit literals, or
// the lit modifier is explicitly used, we need to truncate it to the 32
// LSBs.
- if (!AsmParser->has64BitLiterals() ||
- getModifiers().Lit == LitModifier::Lit)
+ if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
Val = Lo_32(Val);
-
- if (CanUse64BitLiterals && (!isInt<32>(Val) || !isUInt<32>(Val))) {
- Inst.addOperand(MCOperand::createExpr(
- AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
- } else {
- Inst.addOperand(MCOperand::createImm(Val));
- }
- return;
+ break;
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
- if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
+ if (Lit == LitModifier::None &&
+ AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
return;
}
@@ -2501,19 +2517,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
// 1) explicitly forced by using lit modifier;
// 2) the value is a valid 32-bit representation (signed or unsigned),
// meanwhile not forced by lit64 modifier.
- if (getModifiers().Lit == LitModifier::Lit ||
- (getModifiers().Lit != LitModifier::Lit64 &&
- (isInt<32>(Val) || isUInt<32>(Val))))
+ if (Lit == LitModifier::Lit ||
+ (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
Val = static_cast<uint64_t>(Val) << 32;
}
- if (CanUse64BitLiterals && Lo_32(Val) != 0) {
- Inst.addOperand(MCOperand::createExpr(
- AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
- } else {
- Inst.addOperand(MCOperand::createImm(Val));
- }
- return;
+ // For FP64 operands lit() specifies the high half of the value.
+ if (Lit == LitModifier::Lit)
+ Val = Hi_32(Val);
+ break;
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
@@ -2526,25 +2538,23 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_KIMM16:
- Inst.addOperand(MCOperand::createImm(Val));
- return;
+ break;
case AMDGPU::OPERAND_KIMM64:
- if ((isInt<32>(Val) || isUInt<32>(Val)) &&
- getModifiers().Lit != LitModifier::Lit64)
+ if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
Val <<= 32;
-
- if (CanUse64BitLiterals && Lo_32(Val) != 0) {
- Inst.addOperand(MCOperand::createExpr(
- AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
- } else {
- Inst.addOperand(MCOperand::createImm(Val));
- }
- return;
+ break;
default:
llvm_unreachable("invalid operand type");
}
+
+ if (Lit != LitModifier::None) {
+ Inst.addOperand(
+ MCOperand::createExpr(AMDGPUMCExpr::createLit(Lit, Val, Ctx)));
+ } else {
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
}
void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
@@ -4107,7 +4117,7 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
return true;
- unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
+ unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
if (DMask == 0)
@@ -4171,8 +4181,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
unsigned ActualAddrSize =
- IsNSA ? SrsrcIdx - VAddr0Idx
- : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
+ IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
unsigned ExpectedAddrSize =
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
@@ -4182,8 +4191,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
ExpectedAddrSize >
getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
int VAddrLastIdx = SrsrcIdx - 1;
- unsigned VAddrLastSize =
- AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
+ unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
}
@@ -4429,7 +4437,8 @@ bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
return true;
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
- if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
+ if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
+ .getSizeInBits() <= 128)
return true;
if (TRI->regsOverlap(Src2Reg, DstReg)) {
@@ -4814,12 +4823,15 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
const MCOperand &MO = Inst.getOperand(OpIdx);
// Exclude special imm operands (like that used by s_set_gpr_idx_on)
if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
+ bool IsLit = false;
std::optional<int64_t> Imm;
if (MO.isImm()) {
Imm = MO.getImm();
} else if (MO.isExpr()) {
- if (isLitExpr(MO.getExpr()))
+ if (isLitExpr(MO.getExpr())) {
+ IsLit = true;
Imm = getLitValue(MO.getExpr());
+ }
} else {
continue;
}
@@ -4829,7 +4841,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
} else if (!isInlineConstant(Inst, OpIdx)) {
auto OpType = static_cast<AMDGPU::OperandType>(
Desc.operands()[OpIdx].OperandType);
- int64_t Value = encode32BitLiteral(*Imm, OpType);
+ int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
if (NumLiterals == 0 || LiteralValue != Value) {
LiteralValue = Value;
++NumLiterals;
@@ -5000,7 +5012,7 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
- AMDGPU::isDPALU_DPP(MII.get(Opc), getSTI())) {
+ AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
// DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
// only on GFX12.
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
@@ -5523,7 +5535,8 @@ bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
unsigned RegSize =
- TRI->getRegClass(Desc.operands()[SrcIdx].RegClass).getSizeInBits();
+ TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
+ .getSizeInBits();
if (RegSize == AMDGPU::wmmaScaleF8F6F4FormatToNumRegs(Fmt) * 32)
return true;