aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorJeffrey Byrnes <jeffrey.byrnes@amd.com>2025-07-18 13:53:17 -0700
committerGitHub <noreply@github.com>2025-07-18 13:53:17 -0700
commit695660cdfd1ca65cd6e02e6950d10c990dfa0036 (patch)
tree8b10c98e73425f8a641e98094540d991a0476115 /llvm/lib
parentabdd4536ce0fc75c7a4ddcc1da5913ec5e028091 (diff)
downloadllvm-695660cdfd1ca65cd6e02e6950d10c990dfa0036.zip
llvm-695660cdfd1ca65cd6e02e6950d10c990dfa0036.tar.gz
llvm-695660cdfd1ca65cd6e02e6950d10c990dfa0036.tar.bz2
[AMDGPU] Provide control to force VGPR MFMA form (#148079)
This gives an override to the user to force select VGPR form of MFMA. Eventually we will drop this in favor of compiler making better decisions, but this provides a mechanism for users to address the cases where MayNeedAGPRs favors the AGPR form and performance is degraded due to poor RA.
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp14
1 files changed, 12 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 75ce67c..8c2e9b62 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -29,6 +29,16 @@ enum { MAX_LANES = 64 };
using namespace llvm;
+// TODO -- delete this flag once we have more robust mechanisms to allocate the
+// optimal RC for Opc and Dest of MFMA. In particular, there are high RP cases
+// where it is better to produce the VGPR form (e.g. if there are VGPR users
+// of the MFMA result).
+cl::opt<bool> MFMAVGPRForm(
+ "amdgpu-mfma-vgpr-form", cl::Hidden,
+ cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If "
+ "unspecified, default to compiler heuristics"),
+ cl::init(false));
+
const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
const SITargetLowering *TLI = STI->getTargetLowering();
return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine());
@@ -69,8 +79,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
}
- MayNeedAGPRs = ST.hasMAIInsts();
- if (ST.hasGFX90AInsts() &&
+ MayNeedAGPRs = ST.hasMAIInsts() && !MFMAVGPRForm;
+ if (!MFMAVGPRForm && ST.hasGFX90AInsts() &&
ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
!mayUseAGPRs(F))
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.