aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp425
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h19
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h21
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp25
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td19
-rw-r--r--llvm/lib/Target/AMDGPU/SIModeRegister.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h54
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td79
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td54
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td8
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td12
-rw-r--r--llvm/lib/Target/AMDGPU/VOPCInstructions.td500
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td17
22 files changed, 610 insertions, 754 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index b9411e2..9218760 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -33,6 +33,12 @@ def rcp_sqrt_to_rsq : GICombineRule<
[{ return matchRcpSqrtToRsq(*${rcp}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${rcp}, ${matchinfo}); }])>;
+def fdiv_by_sqrt_to_rsq_f16 : GICombineRule<
+ (defs root:$root),
+ (match (G_FSQRT f16:$sqrt, $x, (MIFlags FmContract)),
+ (G_FDIV f16:$dst, $y, $sqrt, (MIFlags FmContract)):$root,
+ [{ return matchFDivSqrtToRsqF16(*${root}); }]),
+ (apply [{ applyFDivSqrtToRsqF16(*${root}, ${x}.getReg()); }])>;
def cvt_f32_ubyteN_matchdata : GIDefMatchData<"CvtF32UByteMatchInfo">;
@@ -156,7 +162,7 @@ def AMDGPUPostLegalizerCombiner: GICombiner<
"AMDGPUPostLegalizerCombinerImpl",
[all_combines, gfx6gfx7_combines, gfx8_combines,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
- rcp_sqrt_to_rsq, sign_extension_in_reg, smulu64]> {
+ rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 0d3b158..13d7510 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4824,9 +4824,8 @@ bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
return true;
}
-static const unsigned SPDenormModeBitField =
- AMDGPU::Hwreg::ID_MODE | (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
- (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
+static constexpr unsigned SPDenormModeBitField =
+ AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 4, 2);
// Enable or disable FP32 denorm mode. When 'Enable' is true, emit instructions
// to enable denorm mode. When 'Enable' is false, disable denorm mode.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index a1c34e9..82e17dd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -83,6 +83,9 @@ public:
matchRcpSqrtToRsq(MachineInstr &MI,
std::function<void(MachineIRBuilder &)> &MatchInfo) const;
+ bool matchFDivSqrtToRsqF16(MachineInstr &MI) const;
+ void applyFDivSqrtToRsqF16(MachineInstr &MI, const Register &X) const;
+
// FIXME: Should be able to have 2 separate matchdatas rather than custom
// struct boilerplate.
struct CvtF32UByteMatchInfo {
@@ -334,6 +337,26 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
return false;
}
+bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16(
+ MachineInstr &MI) const {
+ Register Sqrt = MI.getOperand(2).getReg();
+ return MRI.hasOneNonDBGUse(Sqrt);
+}
+
+void AMDGPUPostLegalizerCombinerImpl::applyFDivSqrtToRsqF16(
+ MachineInstr &MI, const Register &X) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Y = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ uint32_t Flags = MI.getFlags();
+ Register RSQ = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {DstTy})
+ .addUse(X)
+ .setMIFlags(Flags)
+ .getReg(0);
+ B.buildFMul(Dst, RSQ, Y, Flags);
+ MI.eraseFromParent();
+}
+
bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
Register SrcReg = MI.getOperand(1).getReg();
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 5b32b34..b7b471d 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -7272,11 +7272,11 @@ ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
if (trySkipId("hwreg", AsmToken::LParen)) {
OperandInfoTy HwReg(OPR_ID_UNKNOWN);
- OperandInfoTy Offset(OFFSET_DEFAULT_);
- OperandInfoTy Width(WIDTH_DEFAULT_);
+ OperandInfoTy Offset(HwregOffset::Default);
+ OperandInfoTy Width(HwregSize::Default);
if (parseHwregBody(HwReg, Offset, Width) &&
validateHwreg(HwReg, Offset, Width)) {
- ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
+ ImmVal = HwregEncoding::encode(HwReg.Id, Offset.Id, Width.Id);
} else {
return ParseStatus::Failure;
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 894607d..e1cca17 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -119,6 +119,12 @@ static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
}
+static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeDpp8FI(Val));
+}
+
#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
uint64_t /*Addr*/, \
@@ -440,19 +446,6 @@ static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
return DecoderUInt128(Lo, Hi);
}
-// The disassembler is greedy, so we need to check FI operand value to
-// not parse a dpp if the correct literal is not set. For dpp16 the
-// autogenerated decoder checks the dpp literal
-static bool isValidDPP8(const MCInst &MI) {
- using namespace llvm::AMDGPU::DPP;
- int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
- assert(FiIdx != -1);
- if ((unsigned)FiIdx >= MI.getNumOperands())
- return false;
- unsigned Fi = MI.getOperand(FiIdx).getImm();
- return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
-}
-
DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes_,
uint64_t Address,
@@ -460,7 +453,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
Bytes = Bytes_.slice(0, MaxInstBytesNum);
- DecodeStatus Res = MCDisassembler::Fail;
+ // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
+ // there are fewer bytes left). This will be overridden on success.
+ Size = std::min((size_t)4, Bytes_.size());
+
do {
// ToDo: better to switch encoding length using some bit predicate
// but it is unknown yet, so try all we can
@@ -469,222 +465,147 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// encodings
if (isGFX11Plus() && Bytes.size() >= 12 ) {
DecoderUInt128 DecW = eat12Bytes(Bytes);
- Res =
- tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
- MI, DecW, Address, CS);
- if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
- break;
- MI = MCInst(); // clear
- Res =
- tryDecodeInst(DecoderTableDPP8GFX1296, DecoderTableDPP8GFX12_FAKE1696,
- MI, DecW, Address, CS);
- if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
- break;
- MI = MCInst(); // clear
-
- const auto convertVOPDPP = [&]() {
- if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) {
- convertVOP3PDPPInst(MI);
- } else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) {
- convertVOPCDPPInst(MI); // Special VOP3 case
- } else {
- assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
- convertVOP3DPPInst(MI); // Regular VOP3 case
- }
- };
- Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
- MI, DecW, Address, CS);
- if (Res) {
- convertVOPDPP();
- break;
- }
- Res = tryDecodeInst(DecoderTableDPPGFX1296, DecoderTableDPPGFX12_FAKE1696,
- MI, DecW, Address, CS);
- if (Res) {
- convertVOPDPP();
- break;
- }
- Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
- if (Res)
+
+ if (tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
+ DecW, Address, CS))
break;
- Res = tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS);
- if (Res)
+ if (tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
+ DecW, Address, CS))
break;
- Res = tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS);
- if (Res)
+ if (tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
break;
}
+
// Reinitialize Bytes
Bytes = Bytes_.slice(0, MaxInstBytesNum);
if (Bytes.size() >= 8) {
const uint64_t QW = eatBytes<uint64_t>(Bytes);
- if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
- Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS);
- if (Res) {
- if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
- == -1)
- break;
- if (convertDPP8Inst(MI) == MCDisassembler::Success)
- break;
- MI = MCInst(); // clear
- }
- }
-
- Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address, CS);
- if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
- break;
- MI = MCInst(); // clear
-
- Res = tryDecodeInst(DecoderTableDPP8GFX1164,
- DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
- if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
- break;
- MI = MCInst(); // clear
-
- Res = tryDecodeInst(DecoderTableDPP8GFX1264,
- DecoderTableDPP8GFX12_FAKE1664, MI, QW, Address, CS);
- if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
+ tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
break;
- MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
- if (Res) break;
-
- Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
- MI, QW, Address, CS);
- if (Res) {
- if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
- convertVOPCDPPInst(MI);
- break;
- }
-
- Res = tryDecodeInst(DecoderTableDPPGFX1264, DecoderTableDPPGFX12_FAKE1664,
- MI, QW, Address, CS);
- if (Res) {
- if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
- convertVOPCDPPInst(MI);
+ if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
+ tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
break;
- }
-
- if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) {
- Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS);
- if (Res)
- break;
- }
// Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
// v_mad_mixhi_f16 for FMA variants. Try to decode using this special
// table first so we print the correct name.
- if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts)) {
- Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS);
- if (Res)
- break;
- }
+ if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
+ tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
+ break;
- if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
- Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
- if (Res)
- break;
- }
+ if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
+ tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
+ break;
- if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
- Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
- if (Res)
- break;
- }
+ if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
+ tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
+ break;
- Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
- if (Res)
+ if (tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
break;
- Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
- if (Res)
+ if (tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
break;
- Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
- if (Res)
+ if (tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
break;
- Res = tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI,
- QW, Address, CS);
- if (Res)
+ if (tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
+ Address, CS))
break;
- Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI,
- QW, Address, CS);
- if (Res)
+ if (tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
+ Address, CS))
break;
- Res = tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS);
- if (Res)
+ if (tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
break;
- Res = tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS);
- if (Res)
+ if (tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
break;
}
- // Reinitialize Bytes as DPP64 could have eaten too much
+ // Reinitialize Bytes
Bytes = Bytes_.slice(0, MaxInstBytesNum);
// Try decode 32-bit instruction
- if (Bytes.size() < 4) break;
- const uint32_t DW = eatBytes<uint32_t>(Bytes);
- Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS);
- if (Res) break;
+ if (Bytes.size() >= 4) {
+ const uint32_t DW = eatBytes<uint32_t>(Bytes);
- Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS);
- if (Res) break;
+ if (tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
+ break;
- Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS);
- if (Res) break;
+ if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
+ break;
- if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
- Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS);
- if (Res)
+ if (tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
break;
- }
- if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
- Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS);
- if (Res) break;
- }
+ if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
+ tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
+ break;
+
+ if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
+ tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
+ break;
- Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
- if (Res) break;
+ if (tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
+ break;
- Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
- Address, CS);
- if (Res) break;
+ if (tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
+ Address, CS))
+ break;
- Res = tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
- Address, CS);
+ if (tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
+ Address, CS))
+ break;
+ }
+
+ return MCDisassembler::Fail;
} while (false);
- if (Res && AMDGPU::isMAC(MI.getOpcode())) {
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
+ if (isMacDPP(MI))
+ convertMacDPPInst(MI);
+
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
+ convertVOP3PDPPInst(MI);
+ else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
+ AMDGPU::isVOPC64DPP(MI.getOpcode()))
+ convertVOPCDPPInst(MI); // Special VOP3 case
+ else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
+ -1)
+ convertDPP8Inst(MI);
+ else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
+ convertVOP3DPPInst(MI); // Regular VOP3 case
+ }
+
+ if (AMDGPU::isMAC(MI.getOpcode())) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::src2_modifiers);
}
- if (Res && (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
- MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp)) {
+ if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
+ MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::src2_modifiers);
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
+ if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
!AMDGPU::hasGDS(STI)) {
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags &
- (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
+ if (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::cpol);
if (CPolPos != -1) {
@@ -700,9 +621,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags &
- (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
- (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
+ if ((MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
+ (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
// GFX90A lost TFE, its place is occupied by ACC.
int TFEOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
@@ -713,8 +634,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags &
- (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
+ if (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
int SWZOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
if (SWZOpIdx != -1) {
@@ -724,7 +645,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
int RsrcIdx =
@@ -732,36 +653,32 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
if (VAddr0Idx >= 0 && NSAArgs > 0) {
unsigned NSAWords = (NSAArgs + 3) / 4;
- if (Bytes.size() < 4 * NSAWords) {
- Res = MCDisassembler::Fail;
- } else {
- for (unsigned i = 0; i < NSAArgs; ++i) {
- const unsigned VAddrIdx = VAddr0Idx + 1 + i;
- auto VAddrRCID =
- MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
- MI.insert(MI.begin() + VAddrIdx,
- createRegOperand(VAddrRCID, Bytes[i]));
- }
- Bytes = Bytes.slice(4 * NSAWords);
+ if (Bytes.size() < 4 * NSAWords)
+ return MCDisassembler::Fail;
+ for (unsigned i = 0; i < NSAArgs; ++i) {
+ const unsigned VAddrIdx = VAddr0Idx + 1 + i;
+ auto VAddrRCID =
+ MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
+ MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
}
+ Bytes = Bytes.slice(4 * NSAWords);
}
- if (Res)
- Res = convertMIMGInst(MI);
+ convertMIMGInst(MI);
}
- if (Res && (MCII->get(MI.getOpcode()).TSFlags &
- (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE)))
- Res = convertMIMGInst(MI);
+ if (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
+ convertMIMGInst(MI);
- if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP))
- Res = convertEXPInst(MI);
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
+ convertEXPInst(MI);
- if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP))
- Res = convertVINTERPInst(MI);
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
+ convertVINTERPInst(MI);
- if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA))
- Res = convertSDWAInst(MI);
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
+ convertSDWAInst(MI);
int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vdst_in);
@@ -782,27 +699,23 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
int ImmLitIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
- if (Res && ImmLitIdx != -1 && !IsSOPK)
- Res = convertFMAanyK(MI, ImmLitIdx);
+ if (ImmLitIdx != -1 && !IsSOPK)
+ convertFMAanyK(MI, ImmLitIdx);
- // if the opcode was not recognized we'll assume a Size of 4 bytes
- // (unless there are fewer bytes left)
- Size = Res ? (MaxInstBytesNum - Bytes.size())
- : std::min((size_t)4, Bytes_.size());
- return Res;
+ Size = MaxInstBytesNum - Bytes.size();
+ return MCDisassembler::Success;
}
-DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
+void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
// The MCInst still has these fields even though they are no longer encoded
// in the GFX11 instruction.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
}
- return MCDisassembler::Success;
}
-DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
+void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
@@ -815,10 +728,9 @@ DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
// instruction.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
}
- return MCDisassembler::Success;
}
-DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
+void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
STI.hasFeature(AMDGPU::FeatureGFX10)) {
if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
@@ -835,7 +747,6 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
}
}
- return MCDisassembler::Success;
}
struct VOPModifiers {
@@ -939,56 +850,40 @@ void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
AMDGPU::OpName::src2_modifiers);
}
-// We must check FI == literal to reject not genuine dpp8 insts, and we must
-// first add optional MI operands to check FI
-DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
+void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
- if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
- convertVOP3PDPPInst(MI);
- } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
- AMDGPU::isVOPC64DPP(Opc)) {
- convertVOPCDPPInst(MI);
- } else {
- if (isMacDPP(MI))
- convertMacDPPInst(MI);
+ int VDstInIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
+ if (VDstInIdx != -1)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
- int VDstInIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
- if (VDstInIdx != -1)
- insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
+ if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
+ insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
- if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
- MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
- insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
+ unsigned DescNumOps = MCII->get(Opc).getNumOperands();
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
+ convertTrue16OpSel(MI);
+ auto Mods = collectVOPModifiers(MI);
+ insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
+ AMDGPU::OpName::op_sel);
+ } else {
+ // Insert dummy unused src modifiers.
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src0_modifiers);
- unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
- AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
- convertTrue16OpSel(MI);
- auto Mods = collectVOPModifiers(MI);
- insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
- AMDGPU::OpName::op_sel);
- } else {
- // Insert dummy unused src modifiers.
- if (MI.getNumOperands() < DescNumOps &&
- AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
- insertNamedMCOperand(MI, MCOperand::createImm(0),
- AMDGPU::OpName::src0_modifiers);
-
- if (MI.getNumOperands() < DescNumOps &&
- AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
- insertNamedMCOperand(MI, MCOperand::createImm(0),
- AMDGPU::OpName::src1_modifiers);
- }
+ AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src1_modifiers);
}
- return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
}
-DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
- if (isMacDPP(MI))
- convertMacDPPInst(MI);
-
+void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
convertTrue16OpSel(MI);
int VDstInIdx =
@@ -1008,13 +903,12 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
AMDGPU::OpName::op_sel);
}
- return MCDisassembler::Success;
}
// Note that before gfx10, the MIMG encoding provided no information about
// VADDR size. Consequently, decoded instructions always show address as if it
// has 1 dword, which could be not really so.
-DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
+void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
@@ -1043,7 +937,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
if (BaseOpcode->BVH) {
// Add A16 operand for intersect_ray instructions
addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
- return MCDisassembler::Success;
+ return;
}
bool IsAtomic = (VDstIdx != -1);
@@ -1078,7 +972,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
// The NSA encoding does not contain enough operands for the
// combination of base opcode / dimension. Should this be an error?
- return MCDisassembler::Success;
+ return;
}
IsPartialNSA = true;
}
@@ -1097,12 +991,12 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
DstSize += 1;
if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
- return MCDisassembler::Success;
+ return;
int NewOpcode =
AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
if (NewOpcode == -1)
- return MCDisassembler::Success;
+ return;
// Widen the register to the correct number of enabled channels.
unsigned NewVdata = AMDGPU::NoRegister;
@@ -1119,7 +1013,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
if (NewVdata == AMDGPU::NoRegister) {
// It's possible to encode this such that the low register + enabled
// components exceeds the register count.
- return MCDisassembler::Success;
+ return;
}
}
@@ -1137,7 +1031,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
&MRI.getRegClass(AddrRCID));
if (!NewVAddrSA)
- return MCDisassembler::Success;
+ return;
}
MI.setOpcode(NewOpcode);
@@ -1158,14 +1052,12 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
MI.erase(MI.begin() + VAddr0Idx + AddrSize,
MI.begin() + VAddr0Idx + Info->VAddrDwords);
}
-
- return MCDisassembler::Success;
}
// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
// decoder only adds to src_modifiers, so manually add the bits to the other
// operands.
-DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
+void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
auto Mods = collectVOPModifiers(MI, true);
@@ -1190,12 +1082,10 @@ DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
AMDGPU::OpName::neg_hi);
-
- return MCDisassembler::Success;
}
// Create dummy old operand and insert optional operands
-DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
+void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
@@ -1212,11 +1102,9 @@ DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::src1_modifiers);
- return MCDisassembler::Success;
}
-DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
- int ImmLitIdx) const {
+void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
assert(HasLiteral && "Should have decoded a literal");
const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
unsigned DescNumOps = Desc.getNumOperands();
@@ -1232,7 +1120,6 @@ DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
IsDeferredOp)
Op.setImm(Literal);
}
- return MCDisassembler::Success;
}
const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
@@ -1831,6 +1718,12 @@ MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
return decodeSrcOp(OPW32, Val);
}
+MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
+ if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
+ return MCOperand();
+ return MCOperand::createImm(Val);
+}
+
bool AMDGPUDisassembler::isVI() const {
return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 3142b8a..2e1b6fb 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -194,15 +194,15 @@ public:
DecodeStatus decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer,
raw_string_ostream &KdStream) const;
- DecodeStatus convertEXPInst(MCInst &MI) const;
- DecodeStatus convertVINTERPInst(MCInst &MI) const;
- DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
- DecodeStatus convertSDWAInst(MCInst &MI) const;
- DecodeStatus convertDPP8Inst(MCInst &MI) const;
- DecodeStatus convertMIMGInst(MCInst &MI) const;
- DecodeStatus convertVOP3DPPInst(MCInst &MI) const;
- DecodeStatus convertVOP3PDPPInst(MCInst &MI) const;
- DecodeStatus convertVOPCDPPInst(MCInst &MI) const;
+ void convertEXPInst(MCInst &MI) const;
+ void convertVINTERPInst(MCInst &MI) const;
+ void convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
+ void convertSDWAInst(MCInst &MI) const;
+ void convertDPP8Inst(MCInst &MI) const;
+ void convertMIMGInst(MCInst &MI) const;
+ void convertVOP3DPPInst(MCInst &MI) const;
+ void convertVOP3PDPPInst(MCInst &MI) const;
+ void convertVOPCDPPInst(MCInst &MI) const;
void convertMacDPPInst(MCInst &MI) const;
void convertTrue16OpSel(MCInst &MI) const;
@@ -261,6 +261,7 @@ public:
MCOperand decodeBoolReg(unsigned Val) const;
MCOperand decodeSplitBarrier(unsigned Val) const;
+ MCOperand decodeDpp8FI(unsigned Val) const;
int getTTmpIdx(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index a727134..00fa93c 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -177,7 +177,7 @@ static bool isLdsDma(const MachineInstr &MI) {
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
AMDGPU::OpName::simm16);
- return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
+ return std::get<0>(AMDGPU::Hwreg::HwregEncoding::decode(RegOp->getImm()));
}
ScheduleHazardRecognizer::HazardType
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index a45fea6..a32be1e 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1778,13 +1778,9 @@ void AMDGPUInstPrinter::printSDelayALU(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
- unsigned Id;
- unsigned Offset;
- unsigned Width;
-
using namespace llvm::AMDGPU::Hwreg;
unsigned Val = MI->getOperand(OpNo).getImm();
- decodeHwreg(Val, Id, Offset, Width);
+ auto [Id, Offset, Width] = HwregEncoding::decode(Val);
StringRef HwRegName = getHwreg(Id, STI);
O << "hwreg(";
@@ -1793,9 +1789,8 @@ void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
} else {
O << Id;
}
- if (Width != WIDTH_DEFAULT_ || Offset != OFFSET_DEFAULT_) {
+ if (Width != HwregSize::Default || Offset != HwregOffset::Default)
O << ", " << Offset << ", " << Width;
- }
O << ')';
}
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 98310c3..0b516bf 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -549,33 +549,12 @@ enum Id { // HwRegCode, (6) [5:0]
ID_SQ_PERF_SNAPSHOT_DATA1 = 22,
ID_SQ_PERF_SNAPSHOT_PC_LO = 23,
ID_SQ_PERF_SNAPSHOT_PC_HI = 24,
-
- ID_SHIFT_ = 0,
- ID_WIDTH_ = 6,
- ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
};
enum Offset : unsigned { // Offset, (5) [10:6]
- OFFSET_DEFAULT_ = 0,
- OFFSET_SHIFT_ = 6,
- OFFSET_WIDTH_ = 5,
- OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_),
-
OFFSET_MEM_VIOL = 8,
};
-enum WidthMinusOne : unsigned { // WidthMinusOne, (5) [15:11]
- WIDTH_M1_DEFAULT_ = 31,
- WIDTH_M1_SHIFT_ = 11,
- WIDTH_M1_WIDTH_ = 5,
- WIDTH_M1_MASK_ = (((1 << WIDTH_M1_WIDTH_) - 1) << WIDTH_M1_SHIFT_),
-};
-
-// Some values from WidthMinusOne mapped into Width domain.
-enum Width : unsigned {
- WIDTH_DEFAULT_ = WIDTH_M1_DEFAULT_ + 1,
-};
-
enum ModeRegisterMasks : uint32_t {
FP_ROUND_MASK = 0xf << 0, // Bits 0..3
FP_DENORM_MASK = 0xf << 4, // Bits 4..7
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index d02aee7..4f106bf 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -478,14 +478,13 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
.addImm(0);
Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
- addReg(FlatScrInitLo).
- addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
- (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
- addReg(FlatScrInitHi).
- addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
- (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
+ using namespace AMDGPU::Hwreg;
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
+ .addReg(FlatScrInitLo)
+ .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
+ .addReg(FlatScrInitHi)
+ .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 257dff6..d8f528d8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3960,7 +3960,7 @@ SDValue SITargetLowering::lowerGET_ROUNDING(SDValue Op,
assert(Op.getValueType() == MVT::i32);
uint32_t BothRoundHwReg =
- AMDGPU::Hwreg::encodeHwreg(AMDGPU::Hwreg::ID_MODE, 0, 4);
+ AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 4);
SDValue GetRoundBothImm = DAG.getTargetConstant(BothRoundHwReg, SL, MVT::i32);
SDValue IntrinID =
@@ -4195,8 +4195,8 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
MachineBasicBlock::iterator I = LoopBB->end();
- const unsigned EncodedReg = AMDGPU::Hwreg::encodeHwreg(
- AMDGPU::Hwreg::ID_TRAPSTS, AMDGPU::Hwreg::OFFSET_MEM_VIOL, 1);
+ const unsigned EncodedReg = AMDGPU::Hwreg::HwregEncoding::encode(
+ AMDGPU::Hwreg::ID_TRAPSTS, AMDGPU::Hwreg::OFFSET_MEM_VIOL, 1);
// Clear TRAP_STS.MEM_VIOL
BuildMI(*LoopBB, LoopBB->begin(), DL, TII->get(AMDGPU::S_SETREG_IMM32_B32))
@@ -4999,18 +4999,16 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
// Otherwise there was overflow and the result is hi2:0. In both cases the
// result should represent the actual time at some point during the sequence
// of three getregs.
+ using namespace AMDGPU::Hwreg;
Register RegHi1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_GETREG_B32), RegHi1)
- .addImm(AMDGPU::Hwreg::encodeHwreg(AMDGPU::Hwreg::ID_SHADER_CYCLES_HI,
- 0, 32));
+ .addImm(HwregEncoding::encode(ID_SHADER_CYCLES_HI, 0, 32));
Register RegLo1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_GETREG_B32), RegLo1)
- .addImm(
- AMDGPU::Hwreg::encodeHwreg(AMDGPU::Hwreg::ID_SHADER_CYCLES, 0, 32));
+ .addImm(HwregEncoding::encode(ID_SHADER_CYCLES, 0, 32));
Register RegHi2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_GETREG_B32), RegHi2)
- .addImm(AMDGPU::Hwreg::encodeHwreg(AMDGPU::Hwreg::ID_SHADER_CYCLES_HI,
- 0, 32));
+ .addImm(HwregEncoding::encode(ID_SHADER_CYCLES_HI, 0, 32));
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CMP_EQ_U32))
.addReg(RegHi1)
.addReg(RegHi2);
@@ -5207,8 +5205,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
// FIXME: This could be predicates on the immediate, but tablegen doesn't
// allow you to have a no side effect instruction in the output of a
// sideeffecting pattern.
- unsigned ID, Offset, Width;
- AMDGPU::Hwreg::decodeHwreg(MI.getOperand(1).getImm(), ID, Offset, Width);
+ auto [ID, Offset, Width] =
+ AMDGPU::Hwreg::HwregEncoding::decode(MI.getOperand(1).getImm());
if (ID != AMDGPU::Hwreg::ID_MODE)
return BB;
@@ -10495,9 +10493,8 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32,
DenominatorScaled, Flags);
- const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
- (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
- (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
+ using namespace AMDGPU::Hwreg;
+ const unsigned Denorm32Reg = HwregEncoding::encode(ID_MODE, 4, 2);
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
const MachineFunction &MF = DAG.getMachineFunction();
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 6ecb1c8..a6184c5 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -480,6 +480,10 @@ public:
// WaitEventType to corresponding counter values in InstCounterType.
virtual const unsigned *getWaitEventMask() const = 0;
+ // Returns a new waitcnt with all counters except VScnt set to 0. If
+ // IncludeVSCnt is true, VScnt is set to 0, otherwise it is set to ~0u.
+ virtual AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const = 0;
+
virtual ~WaitcntGenerator() = default;
};
@@ -516,6 +520,8 @@ public:
return WaitEventMaskForInstPreGFX12;
}
+
+ virtual AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
};
class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
@@ -549,6 +555,8 @@ public:
return WaitEventMaskForInstGFX12Plus;
}
+
+ virtual AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
};
class SIInsertWaitcnts : public MachineFunctionPass {
@@ -1304,6 +1312,16 @@ bool WaitcntGeneratorPreGFX12::createNewWaitcnt(
return Modified;
}
+AMDGPU::Waitcnt
+WaitcntGeneratorPreGFX12::getAllZeroWaitcnt(bool IncludeVSCnt) const {
+ return AMDGPU::Waitcnt(0, 0, 0, IncludeVSCnt && ST->hasVscnt() ? 0 : ~0u);
+}
+
+AMDGPU::Waitcnt
+WaitcntGeneratorGFX12Plus::getAllZeroWaitcnt(bool IncludeVSCnt) const {
+ return AMDGPU::Waitcnt(0, 0, 0, IncludeVSCnt ? 0 : ~0u, 0, 0, 0);
+}
+
/// Combine consecutive S_WAIT_*CNT instructions that precede \p It and
/// follow \p OldWaitcntInstr and apply any extra waits from \p Wait that
/// were added by previous passes. Currently this pass conservatively
@@ -1613,8 +1631,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
MI.getOpcode() == AMDGPU::SI_RETURN ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
- Wait = Wait.combined(
- AMDGPU::Waitcnt::allZeroExceptVsCnt(ST->hasExtendedWaitCounts()));
+ Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
}
// Identify S_ENDPGM instructions which may have to wait for outstanding VMEM
// stores. In this case it can be useful to send a message to explicitly
@@ -1834,8 +1851,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
// cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here.
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
!ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) {
- Wait = Wait.combined(
- AMDGPU::Waitcnt::allZero(ST->hasExtendedWaitCounts(), ST->hasVscnt()));
+ Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/true));
}
// TODO: Remove this work-around, enable the assert for Bug 457939
@@ -1851,7 +1867,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
ScoreBrackets.simplifyWaitcnt(Wait);
if (ForceEmitZeroWaitcnts)
- Wait = AMDGPU::Waitcnt::allZeroExceptVsCnt(ST->hasExtendedWaitCounts());
+ Wait = WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false);
if (ForceEmitWaitcnt[LOAD_CNT])
Wait.LoadCnt = 0;
@@ -2089,7 +2105,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
if (callWaitsOnFunctionReturn(Inst)) {
// Act as a wait on everything
ScoreBrackets->applyWaitcnt(
- AMDGPU::Waitcnt::allZeroExceptVsCnt(ST->hasExtendedWaitCounts()));
+ WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
ScoreBrackets->setStateOnFunctionEntryOrReturn();
} else {
// May need to way wait for anything.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 97c7237..34cdb09 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -987,8 +987,8 @@ def SDWAVopcDst : BoolRC {
}
class NamedIntOperand<ValueType Type, string Prefix, bit Optional = 1,
- string ConvertMethod = "nullptr">
- : CustomOperand<Type, Optional, NAME> {
+ string name = NAME, string ConvertMethod = "nullptr">
+ : CustomOperand<Type, Optional, name> {
let ParserMethod =
"[this](OperandVector &Operands) -> ParseStatus { "#
"return parseIntWithPrefix(\""#Prefix#"\", Operands, "#
@@ -1090,9 +1090,12 @@ let DefaultValue = "0xf" in {
def DppRowMask : NamedIntOperand<i32, "row_mask">;
def DppBankMask : NamedIntOperand<i32, "bank_mask">;
}
-def DppBoundCtrl : NamedIntOperand<i1, "bound_ctrl", 1,
+def DppBoundCtrl : NamedIntOperand<i1, "bound_ctrl", 1, "DppBoundCtrl",
"[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }">;
-def DppFI : NamedIntOperand<i32, "fi">;
+
+let DecoderMethod = "decodeDpp8FI" in
+def Dpp8FI : NamedIntOperand<i32, "fi", 1, "DppFI">;
+def Dpp16FI : NamedIntOperand<i32, "fi", 1, "DppFI">;
def blgp : CustomOperand<i32, 1, "BLGP">;
def CBSZ : NamedIntOperand<i32, "cbsz">;
@@ -1823,7 +1826,7 @@ class getInsDPP16 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperan
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
- (ins DppFI:$fi));
+ (ins Dpp16FI:$fi));
}
class getInsDPP8 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
@@ -1831,7 +1834,7 @@ class getInsDPP8 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
- (ins dpp8:$dpp8, DppFI:$fi));
+ (ins dpp8:$dpp8, Dpp8FI:$fi));
}
class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> {
@@ -1851,12 +1854,12 @@ class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit Has
class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
- (ins DppFI:$fi));
+ (ins Dpp16FI:$fi));
}
class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
- (ins dpp8:$dpp8, DppFI:$fi));
+ (ins dpp8:$dpp8, Dpp8FI:$fi));
}
// Ins for SDWA
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index e62ad02..c01b126 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -225,11 +225,10 @@ void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
unsigned Offset = llvm::countr_zero<unsigned>(InstrMode.Mask);
unsigned Width = llvm::countr_one<unsigned>(InstrMode.Mask >> Offset);
unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
+ using namespace AMDGPU::Hwreg;
BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32))
.addImm(Value)
- .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) |
- (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
- (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
+ .addImm(HwregEncoding::encode(ID_MODE, Offset, Width));
++NumSetregInserted;
Changed = true;
InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
@@ -276,15 +275,11 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
// as we assume it has been inserted by a higher authority (this is
// likely to be a very rare occurrence).
unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
- if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) !=
- AMDGPU::Hwreg::ID_MODE)
+ using namespace AMDGPU::Hwreg;
+ auto [Id, Offset, Width] = HwregEncoding::decode(Dst);
+ if (Id != ID_MODE)
continue;
- unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
- AMDGPU::Hwreg::WIDTH_M1_SHIFT_) +
- 1;
- unsigned Offset =
- (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
unsigned Mask = maskTrailingOnes<unsigned>(Width) << Offset;
// If an InsertionPoint is set we will insert a setreg there.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index dacdf7b..ce91e05 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1698,22 +1698,14 @@ int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
return (Idx < 0) ? Idx : Opr[Idx].Encoding;
}
-bool isValidHwreg(int64_t Id) {
- return 0 <= Id && isUInt<ID_WIDTH_>(Id);
-}
+bool isValidHwreg(int64_t Id) { return 0 <= Id && isUInt<HwregId::Width>(Id); }
bool isValidHwregOffset(int64_t Offset) {
- return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
+ return 0 <= Offset && isUInt<HwregOffset::Width>(Offset);
}
bool isValidHwregWidth(int64_t Width) {
- return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
-}
-
-uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
- return (Id << ID_SHIFT_) |
- (Offset << OFFSET_SHIFT_) |
- ((Width - 1) << WIDTH_M1_SHIFT_);
+ return 0 <= (Width - 1) && isUInt<HwregSize::Width>(Width - 1);
}
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
@@ -1721,12 +1713,6 @@ StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
return (Idx < 0) ? "" : Opr[Idx].Name;
}
-void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
- Id = (Val & ID_MASK_) >> ID_SHIFT_;
- Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
- Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
-}
-
} // namespace Hwreg
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f35e7744..6826cd2 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -322,6 +322,35 @@ getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
} // end namespace IsaInfo
+// Represents a field in an encoded value.
+template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
+struct EncodingField {
+ static_assert(HighBit >= LowBit, "Invalid bit range!");
+ static constexpr unsigned Offset = LowBit;
+ static constexpr unsigned Width = HighBit - LowBit + 1;
+
+ using ValueType = unsigned;
+ static constexpr ValueType Default = D;
+
+ ValueType Value;
+ constexpr EncodingField(ValueType Value) : Value(Value) {}
+
+ constexpr uint64_t encode() const { return Value; }
+ static ValueType decode(uint64_t Encoded) { return Encoded; }
+};
+
+// A helper for encoding and decoding multiple fields.
+template <typename... Fields> struct EncodingFields {
+ static constexpr uint64_t encode(Fields... Values) {
+ return ((Values.encode() << Values.Offset) | ...);
+ }
+
+ static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
+ return {Fields::decode((Encoded >> Fields::Offset) &
+ maxUIntN(Fields::Width))...};
+ }
+};
+
LLVM_READONLY
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
@@ -870,15 +899,6 @@ struct Waitcnt {
: LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {}
- static Waitcnt allZero(bool Extended, bool HasStorecnt) {
- return Extended ? Waitcnt(0, 0, 0, 0, 0, 0, 0)
- : Waitcnt(0, 0, 0, HasStorecnt ? 0 : ~0u);
- }
-
- static Waitcnt allZeroExceptVsCnt(bool Extended) {
- return Extended ? Waitcnt(0, 0, 0, ~0u, 0, 0, 0) : Waitcnt(0, 0, 0, ~0u);
- }
-
bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
bool hasWaitExceptStoreCnt() const {
@@ -1030,6 +1050,17 @@ unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
namespace Hwreg {
+using HwregId = EncodingField<5, 0>;
+using HwregOffset = EncodingField<10, 6>;
+
+struct HwregSize : EncodingField<15, 11, 32> {
+ using EncodingField::EncodingField;
+ constexpr uint64_t encode() const { return Value - 1; }
+ static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
+};
+
+using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
+
LLVM_READONLY
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI);
@@ -1043,13 +1074,8 @@ LLVM_READNONE
bool isValidHwregWidth(int64_t Width);
LLVM_READNONE
-uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
-
-LLVM_READNONE
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
-void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
-
} // namespace Hwreg
namespace DepCtr {
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 99f8e8e..f5424cf 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -380,9 +380,9 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let OutsDPP = (outs Src0RC32:$vdst);
let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0,
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
- DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, DppFI:$fi);
+ DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi);
let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
- let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, DppFI:$fi);
+ let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, Dpp8FI:$fi);
let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret;
let OutsVOP3DPP = (outs Src0RC64:$vdst);
@@ -749,7 +749,7 @@ class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = p
class VOP1_DPP16_Gen<bits<8> op, VOP1_DPP_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> :
VOP1_DPP16 <op, ps, Gen.Subtarget, p> {
let AssemblerPredicate = Gen.AssemblerPredicate;
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace;
+ let DecoderNamespace = Gen.DecoderNamespace;
}
class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
@@ -770,7 +770,7 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
class VOP1_DPP8_Gen<bits<8> op, VOP1_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> :
VOP1_DPP8<op, ps, p> {
let AssemblerPredicate = Gen.AssemblerPredicate;
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace;
+ let DecoderNamespace = Gen.DecoderNamespace;
}
//===----------------------------------------------------------------------===//
@@ -816,7 +816,7 @@ multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName,
string asmName> {
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
let AsmString = asmName # ps.Pfl.AsmDPP16,
- DecoderNamespace = "DPP" # Gen.DecoderNamespace #
+ DecoderNamespace = Gen.DecoderNamespace #
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
defm NAME : VOP1_Real_dpp<Gen, op, opName>;
}
@@ -831,7 +831,7 @@ multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName,
string asmName> {
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
let AsmString = asmName # ps.Pfl.AsmDPP8,
- DecoderNamespace = "DPP8" # Gen.DecoderNamespace #
+ DecoderNamespace = Gen.DecoderNamespace #
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
defm NAME : VOP1_Real_dpp8<Gen, op, opName>;
}
@@ -994,9 +994,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
}
multiclass VOP1_Real_dpp8_gfx10<bits<9> op> {
if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
- def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> {
- let DecoderNamespace = "DPP8";
- }
+ def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
}
} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
@@ -1192,16 +1190,14 @@ class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
let Inst{31-25} = 0x3f; //encoding
}
-multiclass VOP1Only_Real_vi <bits<10> op> {
- let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
+let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
+ multiclass VOP1Only_Real_vi <bits<10> op> {
def _vi :
VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
}
-}
-multiclass VOP1_Real_e32e64_vi <bits<10> op> {
- let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
+ multiclass VOP1_Real_e32e64_vi <bits<10> op> {
def _e32_vi :
VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
@@ -1389,44 +1385,41 @@ def : GCNPat <
// GFX9
//===----------------------------------------------------------------------===//
-multiclass VOP1_Real_gfx9 <bits<10> op> {
- let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
+let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
+ multiclass VOP1_Real_gfx9 <bits<10> op> {
defm NAME : VOP1_Real_e32e64_vi <op>;
- }
-
- if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
- def _sdwa_gfx9 :
- VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
- VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
-
- if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
- def _dpp_gfx9 :
- VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
- VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
-
-}
-multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> {
- let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
- defm NAME : VOP1_Real_e32e64_vi <op>;
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
+ def _sdwa_gfx9 :
+ VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
+
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
+ def _dpp_gfx9 :
+ VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
+ VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
}
- if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
- def _sdwa_gfx9 :
- VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
- VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
- let Inst{42-40} = 6;
- }
+ multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> {
+ defm NAME : VOP1_Real_e32e64_vi <op>;
- if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
- def _dpp_gfx9 :
- VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
- VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
+ def _sdwa_gfx9 :
+ VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
+ let Inst{42-40} = 6;
+ }
+
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
+ def _dpp_gfx9 :
+ VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
+ VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
+ }
}
defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
-let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in
+let AssemblerPredicate = isGFX940Plus in
defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>;
let OtherPredicates = [HasFP8ConversionInsts] in {
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 4437d5f..13fe79b 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -430,7 +430,7 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
- let InsDPP16 = !con(InsDPP, (ins DppFI:$fi));
+ let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3,
0, HasModifiers, HasModifiers, HasOMod,
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret;
@@ -447,7 +447,7 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
- dpp8:$dpp8, DppFI:$fi);
+ dpp8:$dpp8, Dpp8FI:$fi);
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
@@ -500,7 +500,7 @@ def VOP_MAC_F16_t16 : VOP_MAC <f16> {
let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument
- dpp8:$dpp8, DppFI:$fi);
+ dpp8:$dpp8, Dpp8FI:$fi);
let Src2Mod = FP32InputMods; // dummy unused modifiers
let Src2RC64 = VGPRSrc_32; // stub argument
}
@@ -552,11 +552,11 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], /*EnableClamp=*/
Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
- let InsDPP16 = !con(InsDPP, (ins DppFI:$fi));
+ let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
let InsDPP8 = (ins DstRCDPP:$old,
Src0DPP:$src0,
Src1DPP:$src1,
- dpp8:$dpp8, DppFI:$fi);
+ dpp8:$dpp8, Dpp8FI:$fi);
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
let OutsVOP3DPP = Outs64;
@@ -594,11 +594,11 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableClamp=*/1>
Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
- let InsDPP16 = !con(InsDPP, (ins DppFI:$fi));
+ let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
let InsDPP8 = (ins DstRCDPP:$old,
Src0DPP:$src0,
Src1DPP:$src1,
- dpp8:$dpp8, DppFI:$fi);
+ dpp8:$dpp8, Dpp8FI:$fi);
let HasExt = 1;
let HasExtDPP = 1;
@@ -645,11 +645,11 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
FPVRegInputMods:$src1_modifiers, Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
- let InsDPP16 = !con(InsDPP, (ins DppFI:$fi));
+ let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
let InsDPP8 = (ins DstRCDPP:$old,
FPVRegInputMods:$src0_modifiers, Src0DPP:$src0,
FPVRegInputMods:$src1_modifiers, Src1DPP:$src1,
- dpp8:$dpp8, DppFI:$fi);
+ dpp8:$dpp8, Dpp8FI:$fi);
let Src0ModVOP3DPP = FPVRegInputMods;
let Src1ModVOP3DPP = FPVRegInputMods;
@@ -1273,7 +1273,7 @@ class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen,
VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> {
let AssemblerPredicate = Gen.AssemblerPredicate;
let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace#
+ let DecoderNamespace = Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
@@ -1302,7 +1302,7 @@ class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen,
VOP2_DPP8<op, ps, p> {
let AssemblerPredicate = Gen.AssemblerPredicate;
let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace#
+ let DecoderNamespace = Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
@@ -1748,9 +1748,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
}
multiclass VOP2_Real_dpp8_gfx10<bits<6> op> {
if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
- def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
- let DecoderNamespace = "DPP8";
- }
+ def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
}
//===------------------------- VOP2 (with name) -------------------------===//
@@ -1797,7 +1795,6 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
let AsmString = asmName # ps.Pfl.AsmDPP8;
- let DecoderNamespace = "DPP8";
}
}
@@ -1876,7 +1873,6 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
- let DecoderNamespace = "DPP8";
}
if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
def _dpp8_w32_gfx10 :
@@ -2231,7 +2227,7 @@ multiclass VOP2_SDWA9_Real <bits<6> op> {
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
-let AssemblerPredicate = isGFX8Only in {
+let AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" in {
multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> {
def _e32_vi :
@@ -2239,14 +2235,12 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> {
VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32");
let AsmString = AsmName # ps.AsmOperands;
- let DecoderNamespace = "GFX8";
}
def _e64_vi :
VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>,
VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
let AsmString = AsmName # ps.AsmOperands;
- let DecoderNamespace = "GFX8";
}
if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA then
def _sdwa_vi :
@@ -2263,9 +2257,10 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
let AsmString = AsmName # ps.AsmOperands;
}
}
-}
-let AssemblerPredicate = isGFX9Only in {
+} // End AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8"
+
+let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
def _e32_gfx9 :
@@ -2273,14 +2268,12 @@ multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> {
VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32");
let AsmString = AsmName # ps.AsmOperands;
- let DecoderNamespace = "GFX9";
}
def _e64_gfx9 :
VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>,
VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
let AsmString = AsmName # ps.AsmOperands;
- let DecoderNamespace = "GFX9";
}
if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx9 :
@@ -2295,21 +2288,16 @@ multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
let AsmString = AsmName # ps.AsmOperands;
- let DecoderNamespace = "GFX9";
}
}
multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
def _e32_gfx9 :
VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>,
- VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{
- let DecoderNamespace = "GFX9";
- }
+ VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
def _e64_gfx9 :
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
- VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
- let DecoderNamespace = "GFX9";
- }
+ VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
@@ -2318,12 +2306,10 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
def _dpp_gfx9 :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
- VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
- let DecoderNamespace = "GFX9";
- }
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
}
-} // AssemblerPredicate = isGFX9Only
+} // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9"
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 396ae9c..7198a40 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -532,11 +532,11 @@ def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> {
FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
VGPR_32:$vdst_in, op_sel0:$op_sel,
dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
- DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, DppFI:$fi);
+ DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi);
let InsVOP3DPP8 = (ins VGPR_32:$old,
FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
- VGPR_32:$vdst_in, op_sel0:$op_sel, dpp8:$dpp8, DppFI:$fi);
+ VGPR_32:$vdst_in, op_sel0:$op_sel, dpp8:$dpp8, Dpp8FI:$fi);
let HasClamp = 0;
let HasExtVOP3DPP = 1;
@@ -553,12 +553,12 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
FP32InputMods:$src2_modifiers, VGPR_32:$src2,
op_sel0:$op_sel, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
- DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, DppFI:$fi);
+ DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi);
let InsVOP3DPP8 = (ins VGPR_32:$old,
FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
FP32InputMods:$src2_modifiers, VGPR_32:$src2,
- op_sel0:$op_sel, dpp8:$dpp8, DppFI:$fi);
+ op_sel0:$op_sel, dpp8:$dpp8, Dpp8FI:$fi);
let HasClamp = 0;
let HasSrc2 = 0;
let HasSrc2Mods = 1;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 74f451b..ac3c8f9 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -461,13 +461,13 @@ def VOP3P_DOTF8_Profile : VOP3P_Profile<VOPProfile <[f32, i32, i32, f32]>,
let InsVOP3DPP8 = (ins DstRC:$old, VGPR_32:$src0, VRegSrc_32:$src1,
PackedF16InputMods:$src2_modifiers, VRegSrc_32:$src2,
- neg_lo0:$neg_lo, neg_hi0:$neg_hi, dpp8:$dpp8, DppFI:$fi);
+ neg_lo0:$neg_lo, neg_hi0:$neg_hi, dpp8:$dpp8, Dpp8FI:$fi);
let InsVOP3DPP16 = (ins DstRC:$old, VGPR_32:$src0, VRegSrc_32:$src1,
PackedF16InputMods:$src2_modifiers, VRegSrc_32:$src2,
neg_lo0:$neg_lo, neg_hi0:$neg_hi, dpp_ctrl:$dpp_ctrl,
DppRowMask:$row_mask, DppBankMask:$bank_mask,
- DppBoundCtrl:$bound_ctrl, DppFI:$fi);
+ DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi);
}
multiclass VOP3PDOTF8Inst <string OpName, SDPatternOperator intrinsic_node> {
@@ -1353,6 +1353,7 @@ class VOP3P_DPP16<bits<7> op, VOP_DPP_Pseudo ps, int subtarget,
let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
let OtherPredicates = ps.OtherPredicates;
+ let IsPacked = ps.IsPacked;
}
class VOP3P_DPP8_Base<bits<7> op, VOP_Pseudo ps, string opName = ps.OpName>
@@ -1362,6 +1363,7 @@ class VOP3P_DPP8_Base<bits<7> op, VOP_Pseudo ps, string opName = ps.OpName>
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
let OtherPredicates = ps.OtherPredicates;
+ let IsPacked = ps.IsPacked;
}
//===----------------------------------------------------------------------===//
@@ -1486,7 +1488,7 @@ multiclass VOP3P_Real_dpp<GFXGen Gen, bits<7> op, string backing_ps_name = NAME,
: VOP3P_DPP16<op, !cast<VOP_DPP_Pseudo>(backing_ps_name #"_dpp"),
Gen.Subtarget> {
let AsmString = asmName #ps.Pfl.AsmVOP3DPP16;
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace;
+ let DecoderNamespace = Gen.DecoderNamespace;
let AssemblerPredicate = Gen.AssemblerPredicate;
}
}
@@ -1496,7 +1498,7 @@ multiclass VOP3P_Real_dpp8<GFXGen Gen, bits<7> op, string backing_ps_name = NAME
defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name);
def _dpp8#Gen.Suffix : VOP3P_DPP8_Base<op, ps> {
let AsmString = asmName #ps.Pfl.AsmVOP3DPP8;
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace;
+ let DecoderNamespace = Gen.DecoderNamespace;
let AssemblerPredicate = Gen.AssemblerPredicate;
}
}
@@ -1613,7 +1615,7 @@ multiclass VOP3P_Real_MFMA_gfx940_aliases<string NameFrom, string NameTo, string
multiclass VOP3P_Real_MFMA_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(NAME # "_e64"),
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> {
- let SubtargetPredicate = isGFX940Plus,
+ let AssemblerPredicate = isGFX940Plus,
DecoderNamespace = "GFX940",
AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>,
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index fe52a0e..e5e8244 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -222,6 +222,8 @@ class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst,
let AsmVariantName = AMDGPUAsmVariants.Default;
let SubtargetPredicate = AssemblerPredicate;
+
+ string DecoderNamespace; // dummy
}
multiclass VOPCInstAliases <string old_name, string Arch, string real_name = old_name, string mnemonic_from = real_name> {
@@ -766,7 +768,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
let AsmDPP16 = AsmDPP#"$fi";
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
- let InsDPP16 = !con(InsDPP, (ins DppFI:$fi));
+ let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
// DPP8 forbids modifiers and can inherit from VOPC_Profile
let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
@@ -1331,196 +1333,176 @@ class VOPC64_DPP8_NoDst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
//===----------------------------------------------------------------------===//
multiclass VOPC_Real_Base<GFXGen Gen, bits<9> op> {
- let AssemblerPredicate = Gen.AssemblerPredicate in {
+ let AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace in {
defvar ps32 = !cast<VOPC_Pseudo>(NAME#"_e32");
defvar ps64 = !cast<VOP3_Pseudo>(NAME#"_e64");
- let DecoderNamespace = Gen.DecoderNamespace in {
- def _e32#Gen.Suffix : VOPC_Real<ps32, Gen.Subtarget>,
- VOPCe<op{7-0}>;
- def _e64#Gen.Suffix : VOP3_Real<ps64, Gen.Subtarget>,
- VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
- // Encoding used for VOPC instructions encoded as VOP3 differs from
- // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
- bits<8> sdst;
- let Inst{7-0} = sdst;
- }
- } // End DecoderNamespace = Gen.DecoderNamespace
+ def _e32#Gen.Suffix : VOPC_Real<ps32, Gen.Subtarget>,
+ VOPCe<op{7-0}>;
+ def _e64#Gen.Suffix : VOP3_Real<ps64, Gen.Subtarget>,
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ }
defm : VOPCInstAliases<NAME, !substr(Gen.Suffix,1)>;
if ps32.Pfl.HasExtDPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_e32" #"_dpp");
defvar AsmDPP = ps32.Pfl.AsmDPP16;
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
- def _e32_dpp#Gen.Suffix : VOPC_DPP16_SIMC<op{7-0}, psDPP, Gen.Subtarget>;
- def _e32_dpp_w32#Gen.Suffix : VOPC_DPP16<op{7-0}, psDPP> {
- let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave32;
- }
- def _e32_dpp_w64#Gen.Suffix : VOPC_DPP16<op{7-0}, psDPP> {
- let AsmString = psDPP.OpName # " vcc, " # AsmDPP;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave64;
- }
+ def _e32_dpp#Gen.Suffix : VOPC_DPP16_SIMC<op{7-0}, psDPP, Gen.Subtarget>;
+ def _e32_dpp_w32#Gen.Suffix : VOPC_DPP16<op{7-0}, psDPP> {
+ let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e32_dpp_w64#Gen.Suffix : VOPC_DPP16<op{7-0}, psDPP> {
+ let AsmString = psDPP.OpName # " vcc, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
}
defvar AsmDPP8 = ps32.Pfl.AsmDPP8;
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
- def _e32_dpp8#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32>;
- def _e32_dpp8_w32#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32> {
- let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave32;
- }
- def _e32_dpp8_w64#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32> {
- let AsmString = ps32.OpName # " vcc, " # AsmDPP8;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave64;
- }
+ def _e32_dpp8#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32>;
+ def _e32_dpp8_w32#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32> {
+ let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e32_dpp8_w64#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32> {
+ let AsmString = ps32.OpName # " vcc, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
}
}
if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_e64" #"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
- def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP>,
- SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
- def _e64_dpp_w32#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP> {
- let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave32;
- }
- def _e64_dpp_w64#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP> {
- let AsmString = psDPP.OpName # " vcc, " # AsmDPP;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave64;
- }
+ def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP>,
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
+ def _e64_dpp_w32#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP> {
+ let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e64_dpp_w64#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP> {
+ let AsmString = psDPP.OpName # " vcc, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
}
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
- def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64>;
- def _e64_dpp8_w32#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64> {
- let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave32;
- }
- def _e64_dpp8_w64#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64> {
- let AsmString = ps32.OpName # " vcc, " # AsmDPP8;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave64;
- }
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64>;
+ def _e64_dpp8_w32#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64> {
+ let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e64_dpp8_w64#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64> {
+ let AsmString = ps32.OpName # " vcc, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
}
}
- } // AssemblerPredicate = Gen.AssemblerPredicate
+ } // AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace
}
multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
string asm_name, string pseudo_mnemonic = ""> {
- let AssemblerPredicate = Gen.AssemblerPredicate in {
+ let AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace in {
defvar ps32 = !cast<VOPC_Pseudo>(OpName#"_e32");
defvar ps64 = !cast<VOP3_Pseudo>(OpName#"_e64");
- let DecoderNamespace = Gen.DecoderNamespace in {
- def _e32#Gen.Suffix :
- // 32 and 64 bit forms of the instruction have _e32 and _e64
- // respectively appended to their assembly mnemonic.
- // _e64 is printed as part of the VOPDstS64orS32 operand, whereas
- // the destination-less 32bit forms add it to the asmString here.
- VOPC_Real<ps32, Gen.Subtarget, asm_name#"_e32">,
- VOPCe<op{7-0}>,
- MnemonicAlias<!if(!empty(pseudo_mnemonic), ps32.Mnemonic,
- pseudo_mnemonic),
- asm_name, ps32.AsmVariantName>,
- Requires<[Gen.AssemblerPredicate]>;
- def _e64#Gen.Suffix :
- VOP3_Real<ps64, Gen.Subtarget, asm_name>,
- VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl>,
- MnemonicAlias<!if(!empty(pseudo_mnemonic), ps64.Mnemonic,
- pseudo_mnemonic),
- asm_name, ps64.AsmVariantName>,
- Requires<[Gen.AssemblerPredicate]> {
- // Encoding used for VOPC instructions encoded as VOP3 differs from
- // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
- bits<8> sdst;
- let Inst{7-0} = sdst;
- }
- } // End DecoderNamespace = Gen.DecoderNamespace
+ def _e32#Gen.Suffix :
+ // 32 and 64 bit forms of the instruction have _e32 and _e64
+ // respectively appended to their assembly mnemonic.
+ // _e64 is printed as part of the VOPDstS64orS32 operand, whereas
+ // the destination-less 32bit forms add it to the asmString here.
+ VOPC_Real<ps32, Gen.Subtarget, asm_name#"_e32">,
+ VOPCe<op{7-0}>,
+ MnemonicAlias<!if(!empty(pseudo_mnemonic), ps32.Mnemonic,
+ pseudo_mnemonic),
+ asm_name, ps32.AsmVariantName>,
+ Requires<[Gen.AssemblerPredicate]>;
+ def _e64#Gen.Suffix :
+ VOP3_Real<ps64, Gen.Subtarget, asm_name>,
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl>,
+ MnemonicAlias<!if(!empty(pseudo_mnemonic), ps64.Mnemonic,
+ pseudo_mnemonic),
+ asm_name, ps64.AsmVariantName>,
+ Requires<[Gen.AssemblerPredicate]> {
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ }
defm : VOPCInstAliases<OpName, !substr(Gen.Suffix, 1), NAME, asm_name>;
if ps32.Pfl.HasExtDPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e32" #"_dpp");
defvar AsmDPP = ps32.Pfl.AsmDPP16;
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
- def _e32_dpp#Gen.Suffix : VOPC_DPP16_SIMC<op{7-0}, psDPP,
- Gen.Subtarget, asm_name>;
- def _e32_dpp_w32#Gen.Suffix
- : VOPC_DPP16<op{7-0}, psDPP, asm_name> {
- let AsmString = asm_name # " vcc_lo, " # AsmDPP;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave32;
- }
- def _e32_dpp_w64#Gen.Suffix
- : VOPC_DPP16<op{7-0}, psDPP, asm_name> {
- let AsmString = asm_name # " vcc, " # AsmDPP;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave64;
- }
+ def _e32_dpp#Gen.Suffix : VOPC_DPP16_SIMC<op{7-0}, psDPP,
+ Gen.Subtarget, asm_name>;
+ def _e32_dpp_w32#Gen.Suffix
+ : VOPC_DPP16<op{7-0}, psDPP, asm_name> {
+ let AsmString = asm_name # " vcc_lo, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e32_dpp_w64#Gen.Suffix
+ : VOPC_DPP16<op{7-0}, psDPP, asm_name> {
+ let AsmString = asm_name # " vcc, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
}
defvar AsmDPP8 = ps32.Pfl.AsmDPP8;
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
- def _e32_dpp8#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32, asm_name>;
- def _e32_dpp8_w32#Gen.Suffix
- : VOPC_DPP8<op{7-0}, ps32, asm_name> {
- let AsmString = asm_name # " vcc_lo, " # AsmDPP8;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave32;
- }
- def _e32_dpp8_w64#Gen.Suffix
- : VOPC_DPP8<op{7-0}, ps32, asm_name> {
- let AsmString = asm_name # " vcc, " # AsmDPP8;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave64;
- }
+ def _e32_dpp8#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32, asm_name>;
+ def _e32_dpp8_w32#Gen.Suffix
+ : VOPC_DPP8<op{7-0}, ps32, asm_name> {
+ let AsmString = asm_name # " vcc_lo, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e32_dpp8_w64#Gen.Suffix
+ : VOPC_DPP8<op{7-0}, ps32, asm_name> {
+ let AsmString = asm_name # " vcc, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
}
}
if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e64" #"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
- def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
- SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
- def _e64_dpp_w32#Gen.Suffix
- : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> {
- let AsmString = asm_name # " vcc_lo, " # AsmDPP;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave32;
- }
- def _e64_dpp_w64#Gen.Suffix
- : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> {
- let AsmString = asm_name # " vcc, " # AsmDPP;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave64;
- }
+ def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
+ def _e64_dpp_w32#Gen.Suffix
+ : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> {
+ let AsmString = asm_name # " vcc_lo, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e64_dpp_w64#Gen.Suffix
+ : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> {
+ let AsmString = asm_name # " vcc, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
}
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
- def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
- def _e64_dpp8_w32#Gen.Suffix
- : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> {
- let AsmString = asm_name # " vcc_lo, " # AsmDPP8;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave32;
- }
- def _e64_dpp8_w64#Gen.Suffix
- : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> {
- let AsmString = asm_name # " vcc, " # AsmDPP8;
- let isAsmParserOnly = 1;
- let WaveSizePredicate = isWave64;
- }
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
+ def _e64_dpp8_w32#Gen.Suffix
+ : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> {
+ let AsmString = asm_name # " vcc_lo, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e64_dpp8_w64#Gen.Suffix
+ : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> {
+ let AsmString = asm_name # " vcc, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
}
}
- } // AssemblerPredicate = Gen.AssemblerPredicate
+ } // End AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace
}
multiclass VOPC_Real_t16<GFXGen Gen, bits<9> op, string asm_name,
@@ -1528,123 +1510,103 @@ multiclass VOPC_Real_t16<GFXGen Gen, bits<9> op, string asm_name,
VOPC_Real_with_name<Gen, op, OpName, asm_name, pseudo_mnemonic>;
multiclass VOPCX_Real<GFXGen Gen, bits<9> op> {
- let AssemblerPredicate = Gen.AssemblerPredicate in {
+ let AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace in {
defvar ps32 = !cast<VOPC_Pseudo>(NAME#"_nosdst_e32");
defvar ps64 = !cast<VOP3_Pseudo>(NAME#"_nosdst_e64");
- let DecoderNamespace = Gen.DecoderNamespace in {
- def _e32#Gen.Suffix :
- VOPC_Real<ps32, Gen.Subtarget>,
- VOPCe<op{7-0}> {
- let AsmString = !subst("_nosdst", "", ps32.PseudoInstr)
- # " " # ps32.AsmOperands;
- }
- def _e64#Gen.Suffix :
- VOP3_Real<ps64, Gen.Subtarget>,
- VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
- let Inst{7-0} = ?; // sdst
- let AsmString = !subst("_nosdst", "", ps64.Mnemonic)
- # "{_e64} " # ps64.AsmOperands;
- }
- } // End DecoderNamespace = Gen.DecoderNamespace
+ def _e32#Gen.Suffix :
+ VOPC_Real<ps32, Gen.Subtarget>,
+ VOPCe<op{7-0}> {
+ let AsmString = !subst("_nosdst", "", ps32.PseudoInstr)
+ # " " # ps32.AsmOperands;
+ }
+ def _e64#Gen.Suffix :
+ VOP3_Real<ps64, Gen.Subtarget>,
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
+ let Inst{7-0} = ?; // sdst
+ let AsmString = !subst("_nosdst", "", ps64.Mnemonic)
+ # "{_e64} " # ps64.AsmOperands;
+ }
defm : VOPCXInstAliases<NAME, !substr(Gen.Suffix, 1)>;
if ps32.Pfl.HasExtDPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_nosdst_e32" #"_dpp");
defvar AsmDPP = ps32.Pfl.AsmDPP16;
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
- def _e32_dpp#Gen.Suffix
- : VOPC_DPP16_SIMC<op{7-0}, psDPP, Gen.Subtarget> {
- let AsmString = !subst("_nosdst", "", psDPP.OpName) # " " # AsmDPP;
- }
+ def _e32_dpp#Gen.Suffix
+ : VOPC_DPP16_SIMC<op{7-0}, psDPP, Gen.Subtarget> {
+ let AsmString = !subst("_nosdst", "", psDPP.OpName) # " " # AsmDPP;
}
defvar AsmDPP8 = ps32.Pfl.AsmDPP8;
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
- def _e32_dpp8#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32> {
- let AsmString = !subst("_nosdst", "", ps32.OpName) # " " # AsmDPP8;
- }
+ def _e32_dpp8#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32> {
+ let AsmString = !subst("_nosdst", "", ps32.OpName) # " " # AsmDPP8;
}
}
if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_nosdst_e64" #"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
- def _e64_dpp#Gen.Suffix
- : VOPC64_DPP16_NoDst<{0, op}, psDPP>,
- SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
- let AsmString = !subst("_nosdst", "", psDPP.OpName)
- # "{_e64_dpp} " # AsmDPP;
- }
+ def _e64_dpp#Gen.Suffix
+ : VOPC64_DPP16_NoDst<{0, op}, psDPP>,
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
+ let AsmString = !subst("_nosdst", "", psDPP.OpName)
+ # "{_e64_dpp} " # AsmDPP;
}
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
- def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64> {
- let AsmString = !subst("_nosdst", "", ps64.OpName)
- # "{_e64_dpp} " # AsmDPP8;
- }
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64> {
+ let AsmString = !subst("_nosdst", "", ps64.OpName)
+ # "{_e64_dpp} " # AsmDPP8;
}
}
- } // AssemblerPredicate = Gen.AssemblerPredicate
+ } // End AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace
}
multiclass VOPCX_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
string asm_name, string pseudo_mnemonic = ""> {
- let AssemblerPredicate = Gen.AssemblerPredicate in {
+ let AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace in {
defvar ps32 = !cast<VOPC_Pseudo>(OpName#"_nosdst_e32");
defvar ps64 = !cast<VOP3_Pseudo>(OpName#"_nosdst_e64");
- let DecoderNamespace = Gen.DecoderNamespace in {
- def _e32#Gen.Suffix
- : VOPC_Real<ps32, Gen.Subtarget, asm_name>,
- MnemonicAlias<!if(!empty(pseudo_mnemonic), !subst("_nosdst", "", ps32.Mnemonic),
- pseudo_mnemonic),
- asm_name, ps32.AsmVariantName>,
- Requires<[Gen.AssemblerPredicate]>,
- VOPCe<op{7-0}> {
- let AsmString = asm_name # "{_e32} " # ps32.AsmOperands;
- }
- def _e64#Gen.Suffix
- : VOP3_Real<ps64, Gen.Subtarget, asm_name>,
- MnemonicAlias<!if(!empty(pseudo_mnemonic), !subst("_nosdst", "", ps64.Mnemonic),
- pseudo_mnemonic),
- asm_name, ps64.AsmVariantName>,
- Requires<[Gen.AssemblerPredicate]>,
- VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
- let Inst{7-0} = ? ; // sdst
- let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
- }
- } // End DecoderNamespace = Gen.DecoderNamespace
+ def _e32#Gen.Suffix
+ : VOPC_Real<ps32, Gen.Subtarget, asm_name>,
+ MnemonicAlias<!if(!empty(pseudo_mnemonic), !subst("_nosdst", "", ps32.Mnemonic),
+ pseudo_mnemonic),
+ asm_name, ps32.AsmVariantName>,
+ Requires<[Gen.AssemblerPredicate]>,
+ VOPCe<op{7-0}> {
+ let AsmString = asm_name # "{_e32} " # ps32.AsmOperands;
+ }
+ def _e64#Gen.Suffix
+ : VOP3_Real<ps64, Gen.Subtarget, asm_name>,
+ MnemonicAlias<!if(!empty(pseudo_mnemonic), !subst("_nosdst", "", ps64.Mnemonic),
+ pseudo_mnemonic),
+ asm_name, ps64.AsmVariantName>,
+ Requires<[Gen.AssemblerPredicate]>,
+ VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
+ let Inst{7-0} = ? ; // sdst
+ let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
+ }
defm : VOPCXInstAliases<OpName, !substr(Gen.Suffix, 1), NAME, asm_name>;
if ps32.Pfl.HasExtDPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName#"_nosdst_e32"#"_dpp");
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
- def _e32_dpp#Gen.Suffix : VOPC_DPP16_SIMC<op{7-0}, psDPP,
- Gen.Subtarget, asm_name>;
- }
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
- def _e32_dpp8#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32, asm_name>;
- }
+ def _e32_dpp#Gen.Suffix : VOPC_DPP16_SIMC<op{7-0}, psDPP,
+ Gen.Subtarget, asm_name>;
+ def _e32_dpp8#Gen.Suffix : VOPC_DPP8<op{7-0}, ps32, asm_name>;
}
if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName#"_nosdst_e64"#"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace in {
- def _e64_dpp#Gen.Suffix
- : VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>,
- SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
- let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
- }
+ def _e64_dpp#Gen.Suffix
+ : VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>,
+ SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget> {
+ let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
}
defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace in {
- def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> {
- let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
- }
+ def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> {
+ let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
}
}
- } // AssemblerPredicate = Gen.AssemblerPredicate
+ } // End AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace
}
multiclass VOPCX_Real_t16<GFXGen Gen, bits<9> op, string asm_name,
@@ -1873,21 +1835,19 @@ defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx11_gfx12<0x0ff>;
// GFX10.
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX10Only in {
+let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
multiclass VOPC_Real_gfx10<bits<9> op> {
- let DecoderNamespace = "GFX10" in {
- def _e32_gfx10 :
- VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>,
- VOPCe<op{7-0}>;
- def _e64_gfx10 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
- VOP3a_gfx10<{0, op}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
- // Encoding used for VOPC instructions encoded as VOP3 differs from
- // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
- bits<8> sdst;
- let Inst{7-0} = sdst;
- }
- } // End DecoderNamespace = "GFX10"
+ def _e32_gfx10 :
+ VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>,
+ VOPCe<op{7-0}>;
+ def _e64_gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3a_gfx10<{0, op}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ }
if !cast<VOPC_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx10 :
@@ -1898,22 +1858,20 @@ let AssemblerPredicate = isGFX10Only in {
}
multiclass VOPCX_Real_gfx10<bits<9> op> {
- let DecoderNamespace = "GFX10" in {
- def _e32_gfx10 :
- VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_nosdst_e32"), SIEncodingFamily.GFX10>,
- VOPCe<op{7-0}> {
- let AsmString = !subst("_nosdst", "", !cast<VOPC_Pseudo>(NAME#"_nosdst_e32").PseudoInstr)
- # " " # !cast<VOPC_Pseudo>(NAME#"_nosdst_e32").AsmOperands;
- }
-
- def _e64_gfx10 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_nosdst_e64"), SIEncodingFamily.GFX10>,
- VOP3a_gfx10<{0, op}, !cast<VOP3_Pseudo>(NAME#"_nosdst_e64").Pfl> {
- let Inst{7-0} = ?; // sdst
- let AsmString = !subst("_nosdst", "", !cast<VOP3_Pseudo>(NAME#"_nosdst_e64").Mnemonic)
- # "{_e64} " # !cast<VOP3_Pseudo>(NAME#"_nosdst_e64").AsmOperands;
- }
- } // End DecoderNamespace = "GFX10"
+ def _e32_gfx10 :
+ VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_nosdst_e32"), SIEncodingFamily.GFX10>,
+ VOPCe<op{7-0}> {
+ let AsmString = !subst("_nosdst", "", !cast<VOPC_Pseudo>(NAME#"_nosdst_e32").PseudoInstr)
+ # " " # !cast<VOPC_Pseudo>(NAME#"_nosdst_e32").AsmOperands;
+ }
+
+ def _e64_gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_nosdst_e64"), SIEncodingFamily.GFX10>,
+ VOP3a_gfx10<{0, op}, !cast<VOP3_Pseudo>(NAME#"_nosdst_e64").Pfl> {
+ let Inst{7-0} = ?; // sdst
+ let AsmString = !subst("_nosdst", "", !cast<VOP3_Pseudo>(NAME#"_nosdst_e64").Mnemonic)
+ # "{_e64} " # !cast<VOP3_Pseudo>(NAME#"_nosdst_e64").AsmOperands;
+ }
if !cast<VOPC_Pseudo>(NAME#"_nosdst_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx10 :
@@ -1925,7 +1883,7 @@ let AssemblerPredicate = isGFX10Only in {
defm : VOPCXInstAliases<NAME, "gfx10">;
}
-} // End AssemblerPredicate = isGFX10Only
+} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
defm V_CMP_LT_I16 : VOPC_Real_gfx10<0x089>;
defm V_CMP_EQ_I16 : VOPC_Real_gfx10<0x08a>;
@@ -1990,25 +1948,23 @@ defm V_CMPX_TRU_F16 : VOPCX_Real_gfx10<0x0ff>;
// GFX6, GFX7, GFX10.
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX6GFX7 in {
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
multiclass VOPC_Real_gfx6_gfx7<bits<9> op> {
- let DecoderNamespace = "GFX6GFX7" in {
- def _e32_gfx6_gfx7 :
- VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
- VOPCe<op{7-0}>;
- def _e64_gfx6_gfx7 :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
- VOP3a_gfx6_gfx7<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
- // Encoding used for VOPC instructions encoded as VOP3 differs from
- // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
- bits<8> sdst;
- let Inst{7-0} = sdst;
- }
- } // End DecoderNamespace = "GFX6GFX7"
+ def _e32_gfx6_gfx7 :
+ VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
+ VOPCe<op{7-0}>;
+ def _e64_gfx6_gfx7 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3a_gfx6_gfx7<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ }
defm : VOPCInstAliases<NAME, "gfx6_gfx7">;
}
-} // End AssemblerPredicate = isGFX6GFX7
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
multiclass VOPC_Real_gfx6_gfx7_gfx10<bits<9> op> :
VOPC_Real_gfx6_gfx7<op>, VOPC_Real_gfx10<op>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 801afab..80d7d96 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -818,6 +818,7 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[],
let VALU = 1;
let DPP = 1;
let Size = 8;
+ let IsPacked = P.IsPacked;
let ReadsModeReg = !or(P.DstVT.isFP, P.Src0VT.isFP);
@@ -835,7 +836,7 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[],
AMDGPUAsmVariants.Disable);
let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, "");
- let DecoderNamespace = "DPP";
+ let DecoderNamespace = "GFX8";
VOPProfile Pfl = P;
}
@@ -906,7 +907,7 @@ class VOP_DPP_Base <string OpName, VOPProfile P,
AMDGPUAsmVariants.Disable);
let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, "");
- let DecoderNamespace = "DPP";
+ let DecoderNamespace = "GFX8";
}
class VOP_DPP <string OpName, VOPProfile P, bit IsDPP16,
@@ -1350,7 +1351,7 @@ class VOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen,
VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {
let AssemblerPredicate = Gen.AssemblerPredicate;
let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace#
+ let DecoderNamespace = Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
@@ -1463,7 +1464,7 @@ multiclass VOP3_Real_dpp_with_name<GFXGen Gen, bits<10> op, string opName,
multiclass VOP3_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> {
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace;
+ let DecoderNamespace = Gen.DecoderNamespace;
let AssemblerPredicate = Gen.AssemblerPredicate;
}
}
@@ -1473,7 +1474,7 @@ multiclass VOP3Dot_Real_dpp8_Base<GFXGen Gen, bits<10> op, string opName = NAME>
def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8<op, ps> {
let Inst{11} = ?;
let Inst{12} = ?;
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace;
+ let DecoderNamespace = Gen.DecoderNamespace;
let AssemblerPredicate = Gen.AssemblerPredicate;
}
}
@@ -1482,7 +1483,7 @@ multiclass VOP3_Real_dpp8_with_name<GFXGen Gen, bits<10> op, string opName,
string asmName> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8,
- DecoderNamespace = "DPP8"#Gen.DecoderNamespace#
+ DecoderNamespace = Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16"),
True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
NoTrue16Predicate) in {
@@ -1505,7 +1506,7 @@ multiclass VOP3be_Real_dpp<GFXGen Gen, bits<10> op, string opName,
defvar dpp_ps = !cast<VOP_DPP_Pseudo>(opName #"_e64" #"_dpp");
def _e64_dpp#Gen.Suffix : Base_VOP3b_DPP16<op, dpp_ps, asmName>,
SIMCInstr<dpp_ps.PseudoInstr, Gen.Subtarget> {
- let DecoderNamespace = "DPP"#Gen.DecoderNamespace;
+ let DecoderNamespace = Gen.DecoderNamespace;
let AssemblerPredicate = Gen.AssemblerPredicate;
}
}
@@ -1514,7 +1515,7 @@ multiclass VOP3be_Real_dpp8<GFXGen Gen, bits<10> op, string opName,
string asmName> {
defvar ps = !cast<VOP3_Pseudo>(opName #"_e64");
def _e64_dpp8#Gen.Suffix : VOP3b_DPP8_Base<op, ps, asmName> {
- let DecoderNamespace = "DPP8"#Gen.DecoderNamespace;
+ let DecoderNamespace = Gen.DecoderNamespace;
let AssemblerPredicate = Gen.AssemblerPredicate;
}
}