diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 75ce67c..8c2e9b62 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -29,6 +29,16 @@ enum { MAX_LANES = 64 }; using namespace llvm; +// TODO -- delete this flag once we have more robust mechanisms to allocate the +// optimal RC for Opc and Dest of MFMA. In particular, there are high RP cases +// where it is better to produce the VGPR form (e.g. if there are VGPR users +// of the MFMA result). +cl::opt<bool> MFMAVGPRForm( + "amdgpu-mfma-vgpr-form", cl::Hidden, + cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If " + "unspecified, default to compiler heuristics"), + cl::init(false)); + const GCNTargetMachine &getTM(const GCNSubtarget *STI) { const SITargetLowering *TLI = STI->getTargetLowering(); return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine()); @@ -69,8 +79,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, PSInputAddr = AMDGPU::getInitialPSInputAddr(F); } - MayNeedAGPRs = ST.hasMAIInsts(); - if (ST.hasGFX90AInsts() && + MayNeedAGPRs = ST.hasMAIInsts() && !MFMAVGPRForm; + if (!MFMAVGPRForm && ST.hasGFX90AInsts() && ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && !mayUseAGPRs(F)) MayNeedAGPRs = false; // We will select all MAI with VGPR operands. |