aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td21
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp63
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h4
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td31
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td7
9 files changed, 93 insertions, 63 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 9446144..1a697f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1411,20 +1411,6 @@ def FeatureGloballyAddressableScratch : SubtargetFeature<
"FLAT instructions can access scratch memory for any thread in any wave"
>;
-// FIXME: Remove after all users are migrated to attribute.
-def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",
- "DynamicVGPR",
- "true",
- "Enable dynamic VGPR mode"
->;
-
-// FIXME: Remove after all users are migrated to attribute.
-def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32",
- "DynamicVGPRBlockSize32",
- "true",
- "Use a block size of 32 for dynamic VGPR allocation (default is 16)"
->;
-
// Enable the use of SCRATCH_STORE/LOAD_BLOCK instructions for saving and
// restoring the callee-saved registers.
def FeatureUseBlockVGPROpsForCSR : SubtargetFeature<"block-vgpr-csr",
@@ -1462,6 +1448,12 @@ def Feature45BitNumRecordsBufferResource : SubtargetFeature< "45-bit-num-records
"The buffer resource (V#) supports 45-bit num_records"
>;
+def FeatureClusters : SubtargetFeature< "clusters",
+ "HasClusters",
+ "true",
+ "Has clusters of workgroups support"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -2128,6 +2120,7 @@ def FeatureISAVersion12_50 : FeatureSet<
Feature45BitNumRecordsBufferResource,
FeatureSupportsXNACK,
FeatureXNACK,
+ FeatureClusters,
]>;
def FeatureISAVersion12_51 : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index a67a7be..d0c0822 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1944,6 +1944,7 @@ public:
void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
+ void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
bool parseDimId(unsigned &Encoding);
ParseStatus parseDim(OperandVector &Operands);
@@ -9239,6 +9240,33 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
MCOI::OperandConstraint::TIED_TO) == -1;
}
+void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
+ unsigned Opc = Inst.getOpcode();
+ constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
+ AMDGPU::OpName::src2};
+ constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers};
+ for (int J = 0; J < 3; ++J) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
+ if (OpIdx == -1)
+ // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
+ // no src1. So continue instead of break.
+ continue;
+
+ int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
+ uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
+
+ if ((OpSel & (1 << J)) != 0)
+ ModVal |= SISrcMods::OP_SEL_0;
+ // op_sel[3] is encoded in src0_modifiers.
+ if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
+ ModVal |= SISrcMods::DST_OP_SEL;
+
+ Inst.getOperand(ModIdx).setImm(ModVal);
+ }
+}
+
void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
{
OptionalImmIndexMap OptionalIdx;
@@ -9275,6 +9303,16 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOModSI);
+
+ // Some v_interp instructions use op_sel[3] for dst.
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx,
+ AMDGPUOperand::ImmTyOpSel);
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+
+ cvtOpSelHelper(Inst, OpSel);
+ }
}
void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
@@ -9310,31 +9348,10 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
if (OpSelIdx == -1)
return;
- const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
- AMDGPU::OpName::src2};
- const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
- AMDGPU::OpName::src1_modifiers,
- AMDGPU::OpName::src2_modifiers};
-
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
-
- for (int J = 0; J < 3; ++J) {
- int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
- if (OpIdx == -1)
- break;
-
- int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
- uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
-
- if ((OpSel & (1 << J)) != 0)
- ModVal |= SISrcMods::OP_SEL_0;
- if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
- (OpSel & (1 << 3)) != 0)
- ModVal |= SISrcMods::DST_OP_SEL;
-
- Inst.getOperand(ModIdx).setImm(ModVal);
- }
+ cvtOpSelHelper(Inst, OpSel);
}
+
void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 7b94ea3..f291e37 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -541,7 +541,7 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {
unsigned GCNSubtarget::getBaseMaxNumVGPRs(
const Function &F, std::pair<unsigned, unsigned> NumVGPRBounds) const {
- const auto &[Min, Max] = NumVGPRBounds;
+ const auto [Min, Max] = NumVGPRBounds;
// Check if maximum number of VGPRs was explicitly requested using
// "amdgpu-num-vgpr" attribute.
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index a54d665..c2e6078 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -288,6 +288,8 @@ protected:
bool Has45BitNumRecordsBufferResource = false;
+ bool HasClusters = false;
+
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1837,7 +1839,7 @@ public:
}
/// \returns true if the subtarget supports clusters of workgroups.
- bool hasClusters() const { return GFX1250Insts; }
+ bool hasClusters() const { return HasClusters; }
/// \returns true if the subtarget requires a wait for xcnt before atomic
/// flat/global stores & rmw.
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index d3b5718..3563caa 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1280,6 +1280,17 @@ void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
(ModIdx != -1) ? MI->getOperand(ModIdx).getImm() : DefaultValue;
}
+ // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but no
+ // src1.
+ if (NumOps == 1 && AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src2) &&
+ !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1)) {
+ Ops[NumOps++] = DefaultValue; // Set src1_modifiers to default.
+ int Mod2Idx =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
+ assert(Mod2Idx != -1);
+ Ops[NumOps++] = MI->getOperand(Mod2Idx).getImm();
+ }
+
const bool HasDst =
(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst) != -1) ||
(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst) != -1);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 3c2dd42..3115579 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1118,12 +1118,7 @@ SIRegisterInfo::getPointerRegClass(unsigned Kind) const {
const TargetRegisterClass *
SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
- if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
- return getEquivalentVGPRClass(RC);
- if (RC == &AMDGPU::SCC_CLASSRegClass)
- return getWaveMaskRegClass();
-
- return RC;
+ return RC == &AMDGPU::SCC_CLASSRegClass ? &AMDGPU::SReg_32RegClass : RC;
}
static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 20fa141..f7f4d46 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1353,11 +1353,6 @@ unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
if (DynamicVGPRBlockSize != 0)
return DynamicVGPRBlockSize;
- // Temporarily check the subtarget feature, until we fully switch to using
- // attributes.
- if (STI->getFeatureBits().test(FeatureDynamicVGPR))
- return STI->getFeatureBits().test(FeatureDynamicVGPRBlockSize32) ? 32 : 16;
-
bool IsWave32 = EnableWavefrontSize32
? *EnableWavefrontSize32
: STI->getFeatureBits().test(FeatureWavefrontSize32);
@@ -1412,10 +1407,7 @@ unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
if (Features.test(FeatureGFX90AInsts))
return 512;
- // Temporarily check the subtarget feature, until we fully switch to using
- // attributes.
- if (DynamicVGPRBlockSize != 0 ||
- STI->getFeatureBits().test(FeatureDynamicVGPR))
+ if (DynamicVGPRBlockSize != 0)
// On GFX12 we can allocate at most 8 blocks of VGPRs.
return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
return getAddressableNumArchVGPRs(STI);
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 4a2b54d..42ec8ba 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -97,6 +97,7 @@ class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
VOP3_Pseudo<OpName, P, pattern> {
let AsmMatchConverter = "cvtVOP3Interp";
let mayRaiseFPException = 0;
+ let VOP3_OPSEL = P.HasOpSel;
}
def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> {
@@ -119,16 +120,17 @@ def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> {
let HasSrc0Mods = 0;
}
-class getInterp16Asm <bit HasSrc2, bit HasOMod> {
+class getInterp16Asm <bit HasSrc2, bit HasOMod, bit OpSel> {
string src2 = !if(HasSrc2, ", $src2_modifiers", "");
string omod = !if(HasOMod, "$omod", "");
+ string opsel = !if(OpSel, "$op_sel", "");
string ret =
- " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod;
+ " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod#opsel;
}
class getInterp16Ins <bit HasSrc2, bit HasOMod,
- Operand Src0Mod, Operand Src2Mod> {
- dag ret = !if(HasSrc2,
+ Operand Src0Mod, Operand Src2Mod, bit OpSel> {
+ dag ret1 = !if(HasSrc2,
!if(HasOMod,
(ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
InterpAttr:$attr, InterpAttrChan:$attrchan,
@@ -143,19 +145,22 @@ class getInterp16Ins <bit HasSrc2, bit HasOMod,
InterpAttr:$attr, InterpAttrChan:$attrchan,
highmod:$high, Clamp0:$clamp, omod0:$omod)
);
+ dag ret2 = !if(OpSel, (ins op_sel0:$op_sel), (ins));
+ dag ret = !con(ret1, ret2);
}
-class VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
+class VOP3_INTERP16 <list<ValueType> ArgVT, bit OpSel = 0> : VOPProfile<ArgVT> {
let IsSingle = 1;
let HasOMod = !ne(DstVT.Value, f16.Value);
let HasHigh = 1;
+ let HasOpSel = OpSel;
let Src0Mod = FPVRegInputMods;
let Src2Mod = FPVRegInputMods;
let Outs64 = (outs DstRC.RegClass:$vdst);
- let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod>.ret;
- let Asm64 = getInterp16Asm<HasSrc2, HasOMod>.ret;
+ let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod, OpSel>.ret;
+ let Asm64 = getInterp16Asm<HasSrc2, HasOMod, OpSel>.ret;
}
//===----------------------------------------------------------------------===//
@@ -480,7 +485,7 @@ let SubtargetPredicate = isGFX9Plus in {
defm V_MAD_U16_gfx9 : VOP3Inst_t16 <"v_mad_u16_gfx9", VOP_I16_I16_I16_I16>;
defm V_MAD_I16_gfx9 : VOP3Inst_t16 <"v_mad_i16_gfx9", VOP_I16_I16_I16_I16>;
let OtherPredicates = [isNotGFX90APlus] in
-def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
+def V_INTERP_P2_F16_opsel : VOP3Interp <"v_interp_p2_f16_opsel", VOP3_INTERP16<[f16, f32, i32, f32], /*OpSel*/ 1>>;
} // End SubtargetPredicate = isGFX9Plus
// This predicate should only apply to the selection pattern. The
@@ -2676,6 +2681,14 @@ multiclass VOP3Interp_F16_Real_gfx9<bits<10> op, string OpName, string AsmName>
}
}
+multiclass VOP3Interp_F16_OpSel_Real_gfx9<bits<10> op, string OpName, string AsmName> {
+ def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>,
+ VOP3Interp_OpSel_gfx9 <op, !cast<VOP3_Pseudo>(OpName).Pfl> {
+ VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName);
+ let AsmString = AsmName # ps.AsmOperands;
+ }
+}
+
multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
@@ -2788,7 +2801,7 @@ defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;
defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">;
defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">;
defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">;
-defm V_INTERP_P2_F16_gfx9 : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">;
+defm V_INTERP_P2_F16_opsel : VOP3Interp_F16_OpSel_Real_gfx9 <0x277, "V_INTERP_P2_F16_opsel", "v_interp_p2_f16">;
defm V_ADD_I32 : VOP3_Real_vi <0x29c>;
defm V_SUB_I32 : VOP3_Real_vi <0x29d>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 631f0f3..8325c62 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -419,6 +419,13 @@ class VOP3a_ScaleSel_gfx1250<bits<10> op, VOPProfile p> : VOP3e_gfx11_gfx12<op,
let Inst{14-11} = scale_sel;
}
+class VOP3Interp_OpSel_gfx9<bits<10> op, VOPProfile p> : VOP3Interp_vi<op, p> {
+ let Inst{11} = src0_modifiers{2};
+ // There's no src1
+ let Inst{13} = src2_modifiers{2};
+ let Inst{14} = !if(p.HasDst, src0_modifiers{3}, 0);
+}
+
class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
bits<6> attr;
bits<2> attrchan;