diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 31 |
1 files changed, 25 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 75ce67c..9a1448f 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -29,6 +29,16 @@ enum { MAX_LANES = 64 }; using namespace llvm; +// TODO -- delete this flag once we have more robust mechanisms to allocate the +// optimal RC for Opc and Dest of MFMA. In particular, there are high RP cases +// where it is better to produce the VGPR form (e.g. if there are VGPR users +// of the MFMA result). +cl::opt<bool> MFMAVGPRForm( + "amdgpu-mfma-vgpr-form", cl::Hidden, + cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If " + "unspecified, default to compiler heuristics"), + cl::init(false)); + const GCNTargetMachine &getTM(const GCNSubtarget *STI) { const SITargetLowering *TLI = STI->getTargetLowering(); return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine()); @@ -41,7 +51,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false), PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false), - GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) { + GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0), + IsWholeWaveFunction(F.getCallingConv() == + CallingConv::AMDGPU_Gfx_WholeWave) { const GCNSubtarget &ST = *STI; FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); @@ -70,10 +82,14 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, } MayNeedAGPRs = ST.hasMAIInsts(); - if (ST.hasGFX90AInsts() && - ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && - !mayUseAGPRs(F)) - MayNeedAGPRs = false; // We will select all MAI with VGPR operands. + if (ST.hasGFX90AInsts()) { + // FIXME: MayNeedAGPRs is a misnomer for how this is used. MFMA selection + // should be separated from availability of AGPRs + if (MFMAVGPRForm || + (ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && + !mayUseAGPRs(F))) + MayNeedAGPRs = false; // We will select all MAI with VGPR operands. + } if (AMDGPU::isChainCC(CC)) { // Chain functions don't receive an SP from their caller, but are free to @@ -89,7 +105,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, ImplicitArgPtr = false; } else if (!isEntryFunction()) { - if (CC != CallingConv::AMDGPU_Gfx) + if (CC != CallingConv::AMDGPU_Gfx && + CC != CallingConv::AMDGPU_Gfx_WholeWave) ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; FrameOffsetReg = AMDGPU::SGPR33; @@ -722,6 +739,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( PSInputAddr(MFI.getPSInputAddr()), PSInputEnable(MFI.getPSInputEnable()), MaxMemoryClusterDWords(MFI.getMaxMemoryClusterDWords()), Mode(MFI.getMode()), HasInitWholeWave(MFI.hasInitWholeWave()), + IsWholeWaveFunction(MFI.isWholeWaveFunction()), DynamicVGPRBlockSize(MFI.getDynamicVGPRBlockSize()), ScratchReservedForDynamicVGPRs(MFI.getScratchReservedForDynamicVGPRs()) { for (Register Reg : MFI.getSGPRSpillPhysVGPRs()) @@ -768,6 +786,7 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs; BytesInStackArgArea = YamlMFI.BytesInStackArgArea; ReturnsVoid = YamlMFI.ReturnsVoid; + IsWholeWaveFunction = YamlMFI.IsWholeWaveFunction; if (YamlMFI.ScavengeFI) { auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo()); |