diff options
author | Jeffrey Byrnes <jeffrey.byrnes@amd.com> | 2025-07-18 13:53:17 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-18 13:53:17 -0700 |
commit | 695660cdfd1ca65cd6e02e6950d10c990dfa0036 (patch) | |
tree | 8b10c98e73425f8a641e98094540d991a0476115 /llvm/lib | |
parent | abdd4536ce0fc75c7a4ddcc1da5913ec5e028091 (diff) | |
download | llvm-695660cdfd1ca65cd6e02e6950d10c990dfa0036.zip llvm-695660cdfd1ca65cd6e02e6950d10c990dfa0036.tar.gz llvm-695660cdfd1ca65cd6e02e6950d10c990dfa0036.tar.bz2 |
[AMDGPU] Provide control to force VGPR MFMA form (#148079)
This gives an override to the user to force select VGPR form of MFMA.
Eventually we will drop this in favor of compiler making better
decisions, but this provides a mechanism for users to address the cases
where MayNeedAGPRs favors the AGPR form and performance is degraded due
to poor RA.
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 75ce67c..8c2e9b62 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -29,6 +29,16 @@ enum { MAX_LANES = 64 }; using namespace llvm; +// TODO -- delete this flag once we have more robust mechanisms to allocate the +// optimal RC for Opc and Dest of MFMA. In particular, there are high RP cases +// where it is better to produce the VGPR form (e.g. if there are VGPR users +// of the MFMA result). +cl::opt<bool> MFMAVGPRForm( + "amdgpu-mfma-vgpr-form", cl::Hidden, + cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If " + "unspecified, default to compiler heuristics"), + cl::init(false)); + const GCNTargetMachine &getTM(const GCNSubtarget *STI) { const SITargetLowering *TLI = STI->getTargetLowering(); return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine()); @@ -69,8 +79,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, PSInputAddr = AMDGPU::getInitialPSInputAddr(F); } - MayNeedAGPRs = ST.hasMAIInsts(); - if (ST.hasGFX90AInsts() && + MayNeedAGPRs = ST.hasMAIInsts() && !MFMAVGPRForm; + if (!MFMAVGPRForm && ST.hasGFX90AInsts() && ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && !mayUseAGPRs(F)) MayNeedAGPRs = false; // We will select all MAI with VGPR operands. |