diff options
author | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2023-03-12 13:47:40 +0000 |
---|---|---|
committer | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2023-03-12 13:47:48 +0000 |
commit | d3dda422bfd1dc281df944b4a07bcd6816e2ee94 (patch) | |
tree | 48719850f51d2b45e202b37796bdd256da4b1316 /llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | |
parent | c4918cbf3ba3f6f0caefae3f283b5c05c3baf7ec (diff) | |
download | llvm-d3dda422bfd1dc281df944b4a07bcd6816e2ee94.zip llvm-d3dda422bfd1dc281df944b4a07bcd6816e2ee94.tar.gz llvm-d3dda422bfd1dc281df944b4a07bcd6816e2ee94.tar.bz2 |
[amdgpu][nfc] Replace ad hoc LDS frame recalculation with absolute_symbol MD
Post ISel, LDS variables are absolute values. Representing them as
such is simpler than the frame recalculation currently used to build assembler
tables from their addresses.
This is a precursor to lowering dynamic/external LDS accesses from non-kernel
functions.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D144221
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | 101 |
1 files changed, 37 insertions, 64 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index a6a32b9..e70afd7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -11,7 +11,9 @@ #include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -89,24 +91,7 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; -bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) { - auto name = GV.getName(); - return (name == ModuleLDSName) || - (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds")); -} - -const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal( - const GlobalVariable &GV) { - const Module &M = *GV.getParent(); - StringRef N(GV.getName()); - if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) { - return M.getFunction(N); - } - return nullptr; -} - -const GlobalVariable * -AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) { +static const GlobalVariable *getKernelLDSGlobalFromFunction(const Function &F) { const Module *M = F.getParent(); std::string KernelLDSName = "llvm.amdgcn.kernel."; KernelLDSName += F.getName(); @@ -119,40 +104,8 @@ static bool canElideModuleLDS(const Function &F) { return F.hasFnAttribute("amdgpu-elide-module-lds"); } -unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal( - const GlobalVariable &GV) { - // module.lds, then alignment padding, then kernel.lds, then other variables - // if any - - assert(isKnownAddressLDSGlobal(GV)); - unsigned Offset = 0; - - if (GV.getName() == ModuleLDSName) { - return 0; - } - - const Module *M = GV.getParent(); - const DataLayout &DL = M->getDataLayout(); - - const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName); - const Function *f = getKernelLDSFunctionFromGlobal(GV); - - // Account for module.lds if allocated for this function - if (GVM && f && !canElideModuleLDS(*f)) { - // allocator aligns this to var align, but it's zero to begin with - Offset += DL.getTypeAllocSize(GVM->getValueType()); - } - - // No dynamic LDS alignment done by allocateModuleLDSGlobal - Offset = alignTo( - Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType())); - - return Offset; -} - void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { const Module *M = F.getParent(); - // This function is called before allocating any other LDS so that it can // reliably put values at known addresses. Consequently, dynamic LDS, if // present, will not yet have been allocated @@ -180,40 +133,60 @@ void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); if (GV && !canElideModuleLDS(F)) { - assert(isKnownAddressLDSGlobal(*GV)); unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); - (void)Offset; - assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) && - "Module LDS expected to be allocated before other LDS"); + std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*GV); + if (!Expect || (Offset != Expect)) { + report_fatal_error("Inconsistent metadata on module LDS variable"); + } } if (KV) { // The per-kernel offset is deterministic because it is allocated // before any other non-module LDS variables. - assert(isKnownAddressLDSGlobal(*KV)); unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); - (void)Offset; - assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) && - "Kernel LDS expected to be immediately after module LDS"); + std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*KV); + if (!Expect || (Offset != Expect)) { + report_fatal_error("Inconsistent metadata on kernel LDS variable"); + } } } } std::optional<uint32_t> AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { - auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); + // TODO: Would be more consistent with the abs symbols to use a range + MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); if (MD && MD->getNumOperands() == 1) { - ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0)); - if (KnownSize) { - uint64_t V = KnownSize->getZExtValue(); - if (V <= UINT32_MAX) { - return V; + if (ConstantInt *KnownSize = + mdconst::extract<ConstantInt>(MD->getOperand(0))) { + uint64_t ZExt = KnownSize->getZExtValue(); + if (ZExt <= UINT32_MAX) { + return ZExt; } } } return {}; } +std::optional<uint32_t> +AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { + if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) + return {}; + + std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange(); + if (!AbsSymRange) + return {}; + + if (const APInt *V = AbsSymRange->getSingleElement()) { + std::optional<uint64_t> ZExt = V->tryZExtValue(); + if (ZExt && (*ZExt <= UINT32_MAX)) { + return *ZExt; + } + } + + return {}; +} + void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV) { assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); |