diff options
author | Janek van Oirschot <janek.vanoirschot@amd.com> | 2024-06-13 13:59:31 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-13 13:59:31 +0100 |
commit | 3d1705d00c5f183b73d69aa0ceca481845e82fe8 (patch) | |
tree | d3a5e04f801b454cda7cd669686e32f1e5ee3283 /llvm | |
parent | ffab938f50fa999f2218976f7de78cf8e4f70d4e (diff) | |
download | llvm-3d1705d00c5f183b73d69aa0ceca481845e82fe8.zip llvm-3d1705d00c5f183b73d69aa0ceca481845e82fe8.tar.gz llvm-3d1705d00c5f183b73d69aa0ceca481845e82fe8.tar.bz2 |
MCExpr-ify AMDGPU PALMetadata (#93236)
Allows MCExprs as passed values to PALMetadata. Also adds related
`DelayedMCExpr` classes which serve as a pseudo-fixup to resolve MCExprs
as late as possible (i.e., right before emit through string or blob,
where they should be resolvable).
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 48 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.cpp | 61 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.h | 39 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp | 113 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h | 24 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp | 46 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/SIDefinesUtils.h | 79 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdpal-es.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdpal-gs.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdpal-hs.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdpal-ls.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdpal-vs.ll | 1 | ||||
-rw-r--r-- | llvm/unittests/Target/AMDGPU/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/unittests/Target/AMDGPU/PALMetadata.cpp | 245 |
15 files changed, 601 insertions, 61 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 609e975..d8e22f4b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -29,6 +29,7 @@ #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" +#include "Utils/SIDefinesUtils.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -1234,41 +1235,49 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF, auto &Ctx = MF.getContext(); MD->setEntryPoint(CC, MF.getFunction().getName()); - MD->setNumUsedVgprs( - CC, getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx)); + MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx); // Only set AGPRs for supported devices const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); if (STM.hasMAIInsts()) { - MD->setNumUsedAgprs(CC, getMCExprValue(CurrentProgramInfo.NumAccVGPR, Ctx)); + MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR); } - MD->setNumUsedSgprs( - CC, getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx)); + MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx); if (MD->getPALMajorVersion() < 3) { - MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM)); + MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx); if (AMDGPU::isCompute(CC)) { - MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2()); + MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx); } else { - if (getMCExprValue(CurrentProgramInfo.ScratchBlocks, Ctx) > 0) - MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1)); + const MCExpr *HasScratchBlocks = + MCBinaryExpr::createGT(CurrentProgramInfo.ScratchBlocks, + MCConstantExpr::create(0, Ctx), Ctx); + auto [Shift, Mask] = getShiftMask(C_00B84C_SCRATCH_EN); + MD->setRsrc2(CC, maskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx); } } else { MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode); - MD->setHwStage(CC, ".scratch_en", - (bool)getMCExprValue(CurrentProgramInfo.ScratchEnable, Ctx)); + MD->setHwStage(CC, ".scratch_en", msgpack::Type::Boolean, + CurrentProgramInfo.ScratchEnable); EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM); } // ScratchSize is in bytes, 16 aligned. MD->setScratchSize( - CC, alignTo(getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx), 16)); + CC, + AMDGPUVariadicMCExpr::createAlignTo(CurrentProgramInfo.ScratchSize, + MCConstantExpr::create(16, Ctx), Ctx), + Ctx); + if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) { unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? divideCeil(CurrentProgramInfo.LDSBlocks, 2) : CurrentProgramInfo.LDSBlocks; if (MD->getPALMajorVersion() < 3) { - MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize)); + MD->setRsrc2( + CC, + MCConstantExpr::create(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize), Ctx), + Ctx); MD->setSpiPsInputEna(MFI->getPSInputEnable()); MD->setSpiPsInputAddr(MFI->getPSInputAddr()); } else { @@ -1315,20 +1324,19 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) { if (MD->getPALMajorVersion() < 3) { // Set compute registers - MD->setRsrc1(CallingConv::AMDGPU_CS, - CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST)); + MD->setRsrc1( + CallingConv::AMDGPU_CS, + CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST, Ctx), Ctx); MD->setRsrc2(CallingConv::AMDGPU_CS, - CurrentProgramInfo.getComputePGMRSrc2()); + CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx); } else { EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST); } // Set optional info MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize); - MD->setFunctionNumUsedVgprs( - FnName, getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx)); - MD->setFunctionNumUsedSgprs( - FnName, getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx)); + MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU); + MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU); } // This is supposed to be log2(Size) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.cpp new file mode 100644 index 0000000..a4f4a9e --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.cpp @@ -0,0 +1,61 @@ +//===- AMDGPUDelayedMCExpr.cpp - Delayed MCExpr resolve ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUDelayedMCExpr.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" + +using namespace llvm; + +static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, + MCValue Val) { + msgpack::Document *Doc = DN.getDocument(); + switch (Type) { + default: + return Doc->getEmptyNode(); + case msgpack::Type::Int: + return Doc->getNode(static_cast<int64_t>(Val.getConstant())); + case msgpack::Type::UInt: + return Doc->getNode(static_cast<uint64_t>(Val.getConstant())); + case msgpack::Type::Boolean: + return Doc->getNode(static_cast<bool>(Val.getConstant())); + } +} + +void DelayedMCExprs::assignDocNode(msgpack::DocNode &DN, msgpack::Type Type, + const MCExpr *ExprValue) { + MCValue Res; + if (ExprValue->evaluateAsRelocatable(Res, nullptr, nullptr)) { + if (Res.isAbsolute()) { + DN = getNode(DN, Type, Res); + return; + } + } + + DelayedExprs.push_back(Expr{DN, Type, ExprValue}); +} + +bool DelayedMCExprs::resolveDelayedExpressions() { + while (!DelayedExprs.empty()) { + Expr DE = DelayedExprs.front(); + MCValue Res; + + if (!DE.ExprValue->evaluateAsRelocatable(Res, nullptr, nullptr) || + !Res.isAbsolute()) + return false; + + DelayedExprs.pop_front(); + DE.DN = getNode(DE.DN, DE.Type, Res); + } + + return true; +} + +void DelayedMCExprs::clear() { DelayedExprs.clear(); } + +bool DelayedMCExprs::empty() { return DelayedExprs.empty(); } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.h new file mode 100644 index 0000000..8c9cda3 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.h @@ -0,0 +1,39 @@ +//===- AMDGPUDelayedMCExpr.h - Delayed MCExpr resolve -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUDELAYEDMCEXPR_H +#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUDELAYEDMCEXPR_H + +#include "llvm/BinaryFormat/MsgPackDocument.h" +#include <deque> + +namespace llvm { +class MCExpr; + +class DelayedMCExprs { + struct Expr { + msgpack::DocNode &DN; + msgpack::Type Type; + const MCExpr *ExprValue; + Expr(msgpack::DocNode &DN, msgpack::Type Type, const MCExpr *ExprValue) + : DN(DN), Type(Type), ExprValue(ExprValue) {} + }; + + std::deque<Expr> DelayedExprs; + +public: + bool resolveDelayedExpressions(); + void assignDocNode(msgpack::DocNode &DN, msgpack::Type Type, + const MCExpr *ExprValue); + void clear(); + bool empty(); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUDELAYEDMCEXPR_H diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp index 0fa67c5..a53bf70 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -20,6 +20,7 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/EndianStream.h" @@ -137,12 +138,22 @@ void AMDGPUPALMetadata::setRsrc1(CallingConv::ID CC, unsigned Val) { setRegister(getRsrc1Reg(CC), Val); } +void AMDGPUPALMetadata::setRsrc1(CallingConv::ID CC, const MCExpr *Val, + MCContext &Ctx) { + setRegister(getRsrc1Reg(CC), Val, Ctx); +} + // Set the rsrc2 register in the metadata for a particular shader stage. // In fact this ORs the value into any previous setting of the register. void AMDGPUPALMetadata::setRsrc2(CallingConv::ID CC, unsigned Val) { setRegister(getRsrc1Reg(CC) + 1, Val); } +void AMDGPUPALMetadata::setRsrc2(CallingConv::ID CC, const MCExpr *Val, + MCContext &Ctx) { + setRegister(getRsrc1Reg(CC) + 1, Val, Ctx); +} + // Set the SPI_PS_INPUT_ENA register in the metadata. // In fact this ORs the value into any previous setting of the register. void AMDGPUPALMetadata::setSpiPsInputEna(unsigned Val) { @@ -182,6 +193,40 @@ void AMDGPUPALMetadata::setRegister(unsigned Reg, unsigned Val) { N = N.getDocument()->getNode(Val); } +// Set a register in the metadata. +// In fact this ORs the value into any previous setting of the register. +void AMDGPUPALMetadata::setRegister(unsigned Reg, const MCExpr *Val, + MCContext &Ctx) { + if (!isLegacy()) { + // In the new MsgPack format, ignore register numbered >= 0x10000000. It + // is a PAL ABI pseudo-register in the old non-MsgPack format. + if (Reg >= 0x10000000) + return; + } + auto &N = getRegisters()[MsgPackDoc.getNode(Reg)]; + auto ExprIt = REM.find(Reg); + + if (ExprIt != REM.end()) { + Val = MCBinaryExpr::createOr(Val, ExprIt->getSecond(), Ctx); + // This conditional may be redundant most of the time, but the alternate + // setRegister(unsigned, unsigned) could've been called while the + // conditional returns true (i.e., Reg exists in REM). + if (N.getKind() == msgpack::Type::UInt) { + const MCExpr *NExpr = MCConstantExpr::create(N.getUInt(), Ctx); + Val = MCBinaryExpr::createOr(Val, NExpr, Ctx); + } + ExprIt->getSecond() = Val; + } else if (N.getKind() == msgpack::Type::UInt) { + const MCExpr *NExpr = MCConstantExpr::create(N.getUInt(), Ctx); + Val = MCBinaryExpr::createOr(Val, NExpr, Ctx); + int64_t Unused; + if (!Val->evaluateAsAbsolute(Unused)) + REM[Reg] = Val; + (void)Unused; + } + DelayedExprs.assignDocNode(N, msgpack::Type::UInt, Val); +} + // Set the entry point name for one shader. void AMDGPUPALMetadata::setEntryPoint(unsigned CC, StringRef Name) { if (isLegacy()) @@ -207,11 +252,29 @@ void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, unsigned Val) { getHwStage(CC)[".vgpr_count"] = MsgPackDoc.getNode(Val); } +void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, const MCExpr *Val, + MCContext &Ctx) { + if (isLegacy()) { + // Old non-msgpack format. + unsigned NumUsedVgprsKey = getScratchSizeKey(CC) + + PALMD::Key::VS_NUM_USED_VGPRS - + PALMD::Key::VS_SCRATCH_SIZE; + setRegister(NumUsedVgprsKey, Val, Ctx); + return; + } + // Msgpack format. + setHwStage(CC, ".vgpr_count", msgpack::Type::UInt, Val); +} + // Set the number of used agprs in the metadata. void AMDGPUPALMetadata::setNumUsedAgprs(CallingConv::ID CC, unsigned Val) { getHwStage(CC)[".agpr_count"] = Val; } +void AMDGPUPALMetadata::setNumUsedAgprs(unsigned CC, const MCExpr *Val) { + setHwStage(CC, ".agpr_count", msgpack::Type::UInt, Val); +} + // Set the number of used sgprs in the metadata. This is an optional advisory // record for logging etc; wave dispatch actually uses the rsrc1 register for // the shader stage to determine the number of sgprs to allocate. @@ -228,6 +291,20 @@ void AMDGPUPALMetadata::setNumUsedSgprs(CallingConv::ID CC, unsigned Val) { getHwStage(CC)[".sgpr_count"] = MsgPackDoc.getNode(Val); } +void AMDGPUPALMetadata::setNumUsedSgprs(unsigned CC, const MCExpr *Val, + MCContext &Ctx) { + if (isLegacy()) { + // Old non-msgpack format. + unsigned NumUsedSgprsKey = getScratchSizeKey(CC) + + PALMD::Key::VS_NUM_USED_SGPRS - + PALMD::Key::VS_SCRATCH_SIZE; + setRegister(NumUsedSgprsKey, Val, Ctx); + return; + } + // Msgpack format. + setHwStage(CC, ".sgpr_count", msgpack::Type::UInt, Val); +} + // Set the scratch size in the metadata. void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) { if (isLegacy()) { @@ -239,6 +316,17 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) { getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val); } +void AMDGPUPALMetadata::setScratchSize(unsigned CC, const MCExpr *Val, + MCContext &Ctx) { + if (isLegacy()) { + // Old non-msgpack format. + setRegister(getScratchSizeKey(CC), Val, Ctx); + return; + } + // Msgpack format. + setHwStage(CC, ".scratch_memory_size", msgpack::Type::UInt, Val); +} + // Set the stack frame size of a function in the metadata. void AMDGPUPALMetadata::setFunctionScratchSize(StringRef FnName, unsigned Val) { auto Node = getShaderFunction(FnName); @@ -259,6 +347,12 @@ void AMDGPUPALMetadata::setFunctionNumUsedVgprs(StringRef FnName, Node[".vgpr_count"] = MsgPackDoc.getNode(Val); } +void AMDGPUPALMetadata::setFunctionNumUsedVgprs(StringRef FnName, + const MCExpr *Val) { + auto Node = getShaderFunction(FnName); + DelayedExprs.assignDocNode(Node[".vgpr_count"], msgpack::Type::UInt, Val); +} + // Set the number of used vgprs in the metadata. void AMDGPUPALMetadata::setFunctionNumUsedSgprs(StringRef FnName, unsigned Val) { @@ -266,6 +360,12 @@ void AMDGPUPALMetadata::setFunctionNumUsedSgprs(StringRef FnName, Node[".sgpr_count"] = MsgPackDoc.getNode(Val); } +void AMDGPUPALMetadata::setFunctionNumUsedSgprs(StringRef FnName, + const MCExpr *Val) { + auto Node = getShaderFunction(FnName); + DelayedExprs.assignDocNode(Node[".sgpr_count"], msgpack::Type::UInt, Val); +} + // Set the hardware register bit in PAL metadata to enable wave32 on the // shader of the given calling convention. void AMDGPUPALMetadata::setWave32(unsigned CC) { @@ -662,6 +762,7 @@ void AMDGPUPALMetadata::toString(std::string &String) { String.clear(); if (!BlobType) return; + ResolvedAll = DelayedExprs.resolveDelayedExpressions(); raw_string_ostream Stream(String); if (isLegacy()) { if (MsgPackDoc.getRoot().getKind() == msgpack::Type::Nil) @@ -711,6 +812,7 @@ void AMDGPUPALMetadata::toString(std::string &String) { // a .note record of the specified AMD type. Returns an empty blob if // there is no PAL metadata, void AMDGPUPALMetadata::toBlob(unsigned Type, std::string &Blob) { + ResolvedAll = DelayedExprs.resolveDelayedExpressions(); if (Type == ELF::NT_AMD_PAL_METADATA) toLegacyBlob(Blob); else if (Type) @@ -906,11 +1008,17 @@ void AMDGPUPALMetadata::setLegacy() { // Erase all PAL metadata. void AMDGPUPALMetadata::reset() { MsgPackDoc.clear(); + REM.clear(); + DelayedExprs.clear(); Registers = MsgPackDoc.getEmptyNode(); HwStages = MsgPackDoc.getEmptyNode(); ShaderFunctions = MsgPackDoc.getEmptyNode(); } +bool AMDGPUPALMetadata::resolvedAllMCExpr() { + return ResolvedAll && DelayedExprs.empty(); +} + unsigned AMDGPUPALMetadata::getPALVersion(unsigned idx) { assert(idx < 2 && "illegal index to PAL version - should be 0 (major) or 1 (minor)"); @@ -942,6 +1050,11 @@ void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, bool Val) { getHwStage(CC)[field] = Val; } +void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, + msgpack::Type Type, const MCExpr *Val) { + DelayedExprs.assignDocNode(getHwStage(CC)[field], Type, Val); +} + void AMDGPUPALMetadata::setComputeRegisters(StringRef field, unsigned Val) { getComputeRegisters()[field] = Val; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h index 158f766..e05532a 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -13,7 +13,10 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H +#include "AMDGPUDelayedMCExpr.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/BinaryFormat/MsgPackDocument.h" +#include "llvm/MC/MCContext.h" namespace llvm { @@ -21,6 +24,10 @@ class Module; class StringRef; class AMDGPUPALMetadata { +public: + using RegisterExprMap = DenseMap<unsigned, const MCExpr *>; + +private: unsigned BlobType = 0; msgpack::Document MsgPackDoc; msgpack::DocNode Registers; @@ -32,6 +39,10 @@ class AMDGPUPALMetadata { msgpack::DocNode ComputeRegisters; msgpack::DocNode GraphicsRegisters; + DelayedMCExprs DelayedExprs; + RegisterExprMap REM; + bool ResolvedAll = true; + public: // Read the amdgpu.pal.metadata supplied by the frontend, ready for // per-function modification. @@ -45,10 +56,12 @@ public: // Set the rsrc1 register in the metadata for a particular shader stage. // In fact this ORs the value into any previous setting of the register. void setRsrc1(unsigned CC, unsigned Val); + void setRsrc1(unsigned CC, const MCExpr *Val, MCContext &Ctx); // Set the rsrc2 register in the metadata for a particular shader stage. // In fact this ORs the value into any previous setting of the register. void setRsrc2(unsigned CC, unsigned Val); + void setRsrc2(unsigned CC, const MCExpr *Val, MCContext &Ctx); // Set the SPI_PS_INPUT_ENA register in the metadata. // In fact this ORs the value into any previous setting of the register. @@ -64,6 +77,7 @@ public: // Set a register in the metadata. // In fact this ORs the value into any previous setting of the register. void setRegister(unsigned Reg, unsigned Val); + void setRegister(unsigned Reg, const MCExpr *Val, MCContext &Ctx); // Set the entry point name for one shader. void setEntryPoint(unsigned CC, StringRef Name); @@ -72,18 +86,22 @@ public: // record for logging etc; wave dispatch actually uses the rsrc1 register for // the shader stage to determine the number of vgprs to allocate. void setNumUsedVgprs(unsigned CC, unsigned Val); + void setNumUsedVgprs(unsigned CC, const MCExpr *Val, MCContext &Ctx); // Set the number of used agprs in the metadata. This is an optional advisory // record for logging etc; void setNumUsedAgprs(unsigned CC, unsigned Val); + void setNumUsedAgprs(unsigned CC, const MCExpr *Val); // Set the number of used sgprs in the metadata. This is an optional advisory // record for logging etc; wave dispatch actually uses the rsrc1 register for // the shader stage to determine the number of sgprs to allocate. void setNumUsedSgprs(unsigned CC, unsigned Val); + void setNumUsedSgprs(unsigned CC, const MCExpr *Val, MCContext &Ctx); // Set the scratch size in the metadata. void setScratchSize(unsigned CC, unsigned Val); + void setScratchSize(unsigned CC, const MCExpr *Val, MCContext &Ctx); // Set the stack frame size of a function in the metadata. void setFunctionScratchSize(StringRef FnName, unsigned Val); @@ -97,11 +115,13 @@ public: // record for logging etc; wave dispatch actually uses the rsrc1 register for // the shader stage to determine the number of vgprs to allocate. void setFunctionNumUsedVgprs(StringRef FnName, unsigned Val); + void setFunctionNumUsedVgprs(StringRef FnName, const MCExpr *Val); // Set the number of used sgprs in the metadata. This is an optional advisory // record for logging etc; wave dispatch actually uses the rsrc1 register for // the shader stage to determine the number of sgprs to allocate. void setFunctionNumUsedSgprs(StringRef FnName, unsigned Val); + void setFunctionNumUsedSgprs(StringRef FnName, const MCExpr *Val); // Set the hardware register bit in PAL metadata to enable wave32 on the // shader of the given calling convention. @@ -138,6 +158,8 @@ public: void setHwStage(unsigned CC, StringRef field, unsigned Val); void setHwStage(unsigned CC, StringRef field, bool Val); + void setHwStage(unsigned CC, StringRef field, msgpack::Type Type, + const MCExpr *Val); void setComputeRegisters(StringRef field, unsigned Val); void setComputeRegisters(StringRef field, bool Val); @@ -156,6 +178,8 @@ public: // Erase all PAL metadata. void reset(); + bool resolvedAllMCExpr(); + private: // Return whether the blob type is legacy PAL metadata. bool isLegacy() const; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp index eaee1a2..720d5a1 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp @@ -14,6 +14,7 @@ #include "AMDKernelCodeT.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" +#include "Utils/SIDefinesUtils.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCContext.h" @@ -220,43 +221,6 @@ static int get_amd_kernel_code_t_FieldIndex(StringRef name) { return map.lookup(name) - 1; // returns -1 if not found } -static constexpr std::pair<unsigned, unsigned> getShiftMask(unsigned Value) { - unsigned Shift = 0; - unsigned Mask = 0; - - Mask = ~Value; - for (; !(Mask & 1); Shift++, Mask >>= 1) { - } - - return std::make_pair(Shift, Mask); -} - -static const MCExpr *MaskShiftSet(const MCExpr *Val, uint32_t Mask, - uint32_t Shift, MCContext &Ctx) { - if (Mask) { - const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx); - Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx); - } - if (Shift) { - const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx); - Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx); - } - return Val; -} - -static const MCExpr *MaskShiftGet(const MCExpr *Val, uint32_t Mask, - uint32_t Shift, MCContext &Ctx) { - if (Shift) { - const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx); - Val = MCBinaryExpr::createLShr(Val, ShiftExpr, Ctx); - } - if (Mask) { - const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx); - Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx); - } - return Val; -} - class PrintField { public: template <typename T, T AMDGPUMCKernelCodeT::*ptr, @@ -305,10 +269,10 @@ static ArrayRef<PrintFx> getPrinterTable() { const MCExpr *Value; \ if (PGMType == 0) { \ Value = \ - MaskShiftGet(C.compute_pgm_resource1_registers, Mask, Shift, Ctx); \ + maskShiftGet(C.compute_pgm_resource1_registers, Mask, Shift, Ctx); \ } else { \ Value = \ - MaskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx); \ + maskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx); \ } \ int64_t Val; \ if (Value->evaluateAsAbsolute(Val)) \ @@ -392,7 +356,7 @@ static ArrayRef<ParseFx> getParserTable() { if (!parseExpr(MCParser, Value, Err)) \ return false; \ auto [Shift, Mask] = getShiftMask(Complement); \ - Value = MaskShiftSet(Value, Mask, Shift, Ctx); \ + Value = maskShiftSet(Value, Mask, Shift, Ctx); \ const MCExpr *Compl = MCConstantExpr::create(Complement, Ctx); \ if (PGMType == 0) { \ C.compute_pgm_resource1_registers = MCBinaryExpr::createAnd( \ @@ -542,7 +506,7 @@ void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) { const MCExpr *CodeProps = MCConstantExpr::create(code_properties, Ctx); CodeProps = MCBinaryExpr::createOr( CodeProps, - MaskShiftSet(is_dynamic_callstack, + maskShiftSet(is_dynamic_callstack, (1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1, AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx), Ctx); diff --git a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt index 2f4ce8e..09b8da9 100644 --- a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_component_library(LLVMAMDGPUUtils AMDGPUAsmUtils.cpp AMDGPUBaseInfo.cpp + AMDGPUDelayedMCExpr.cpp AMDGPUMemoryUtils.cpp AMDGPUPALMetadata.cpp AMDKernelCodeTUtils.cpp diff --git a/llvm/lib/Target/AMDGPU/Utils/SIDefinesUtils.h b/llvm/lib/Target/AMDGPU/Utils/SIDefinesUtils.h new file mode 100644 index 0000000..64d21de --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/SIDefinesUtils.h @@ -0,0 +1,79 @@ +//===-- SIDefines.h - SI Helper Functions -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +/// \file - utility functions for the SIDefines and its common uses. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_SIDEFINESUTILS_H +#define LLVM_LIB_TARGET_AMDGPU_UTILS_SIDEFINESUTILS_H + +#include "llvm/MC/MCExpr.h" +#include <utility> + +namespace llvm { +class MCContext; +namespace AMDGPU { + +/// Deduce the least significant bit aligned shift and mask values for a binary +/// Complement \p Value (as they're defined in SIDefines.h as C_*) as a returned +/// pair<shift, mask>. That is to say \p Value == ~(mask << shift) +/// +/// For example, given C_00B848_FWD_PROGRESS (i.e., 0x7FFFFFFF) from +/// SIDefines.h, this will return the pair as (31,1). +constexpr std::pair<unsigned, unsigned> getShiftMask(unsigned Value) { + unsigned Shift = 0; + unsigned Mask = 0; + + Mask = ~Value; + for (; !(Mask & 1); Shift++, Mask >>= 1) { + } + + return std::make_pair(Shift, Mask); +} + +/// Provided with the MCExpr * \p Val, uint32 \p Mask and \p Shift, will return +/// the masked and left shifted, in said order of operations, MCExpr * created +/// within the MCContext \p Ctx. +/// +/// For example, given MCExpr *Val, Mask == 0xf, Shift == 6 the returned MCExpr +/// * will be the equivalent of (Val & 0xf) << 6 +inline const MCExpr *maskShiftSet(const MCExpr *Val, uint32_t Mask, + uint32_t Shift, MCContext &Ctx) { + if (Mask) { + const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx); + Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx); + } + if (Shift) { + const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx); + Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx); + } + return Val; +} + +/// Provided with the MCExpr * \p Val, uint32 \p Mask and \p Shift, will return +/// the right shifted and masked, in said order of operations, MCExpr * created +/// within the MCContext \p Ctx. +/// +/// For example, given MCExpr *Val, Mask == 0xf, Shift == 6 the returned MCExpr +/// * will be the equivalent of (Val >> 6) & 0xf +inline const MCExpr *maskShiftGet(const MCExpr *Val, uint32_t Mask, + uint32_t Shift, MCContext &Ctx) { + if (Shift) { + const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx); + Val = MCBinaryExpr::createLShr(Val, ShiftExpr, Ctx); + } + if (Mask) { + const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx); + Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx); + } + return Val; +} + +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_SIDEFINESUTILS_H diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-es.ll b/llvm/test/CodeGen/AMDGPU/amdpal-es.ll index 679e085..657fe80 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-es.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-es.ll @@ -11,6 +11,7 @@ ; GCN-NEXT: .scratch_memory_size: 0 ; GCN: .registers: ; GCN-NEXT: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0 +; GCN-NEXT: '0x2ccb (SPI_SHADER_PGM_RSRC2_ES)': 0 ; GCN-NEXT: ... ; GCN-NEXT: .end_amdgpu_pal_metadata define amdgpu_es half @es_amdpal(half %arg0) { diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll index 75f7a1d..9f5eb39 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll @@ -12,6 +12,7 @@ ; GCN-NEXT: .scratch_memory_size: 0 ; GCN: .registers: ; GCN-NEXT: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0 +; GCN-NEXT: '0x2c8b (SPI_SHADER_PGM_RSRC2_GS)': 0 ; GCN-NEXT: ... ; GCN-NEXT: .end_amdgpu_pal_metadata define amdgpu_gs half @gs_amdpal(half %arg0) { diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-hs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-hs.ll index c61578a..7eacedf 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-hs.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-hs.ll @@ -12,6 +12,7 @@ ; GCN-NEXT: .scratch_memory_size: 0 ; GCN: .registers: ; GCN-NEXT: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0 +; GCN-NEXT: '0x2d0b (SPI_SHADER_PGM_RSRC2_HS)': 0 ; GCN-NEXT: ... ; GCN-NEXT: .end_amdgpu_pal_metadata define amdgpu_hs half @hs_amdpal(half %arg0) { diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll b/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll index 8162c82..973eb56 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll @@ -11,6 +11,7 @@ ; GCN-NEXT: .scratch_memory_size: 0 ; GCN: .registers: ; GCN-NEXT: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0 +; GCN-NEXT: '0x2d4b (SPI_SHADER_PGM_RSRC2_LS)': 0 ; GCN-NEXT: ... ; GCN-NEXT: .end_amdgpu_pal_metadata define amdgpu_ls half @ls_amdpal(half %arg0) { diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll index c300ba1..e554bb8 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll @@ -12,6 +12,7 @@ ; GCN-NEXT: .scratch_memory_size: 0 ; GCN: .registers: ; GCN-NEXT: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0 +; GCN-NEXT: '0x2c4b (SPI_SHADER_PGM_RSRC2_VS)': 0 ; GCN-NEXT: ... ; GCN-NEXT: .end_amdgpu_pal_metadata define amdgpu_vs half @vs_amdpal(half %arg0) { diff --git a/llvm/unittests/Target/AMDGPU/CMakeLists.txt b/llvm/unittests/Target/AMDGPU/CMakeLists.txt index ae50529..e0efb96 100644 --- a/llvm/unittests/Target/AMDGPU/CMakeLists.txt +++ b/llvm/unittests/Target/AMDGPU/CMakeLists.txt @@ -20,4 +20,5 @@ add_llvm_target_unittest(AMDGPUTests AMDGPUUnitTests.cpp DwarfRegMappings.cpp ExecMayBeModifiedBeforeAnyUse.cpp + PALMetadata.cpp ) diff --git a/llvm/unittests/Target/AMDGPU/PALMetadata.cpp b/llvm/unittests/Target/AMDGPU/PALMetadata.cpp new file mode 100644 index 0000000..7d4f383 --- /dev/null +++ b/llvm/unittests/Target/AMDGPU/PALMetadata.cpp @@ -0,0 +1,245 @@ +//===- llvm/unittests/MC/AMDGPU/PALMetadata.cpp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUTargetMachine.h" +#include "GCNSubtarget.h" +#include "SIProgramInfo.h" +#include "Utils/AMDGPUPALMetadata.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "gtest/gtest.h" + +using namespace llvm; + +class PALMetadata : public testing::Test { +protected: + std::unique_ptr<GCNTargetMachine> TM; + std::unique_ptr<LLVMContext> Ctx; + std::unique_ptr<GCNSubtarget> ST; + std::unique_ptr<MachineModuleInfo> MMI; + std::unique_ptr<MachineFunction> MF; + std::unique_ptr<Module> M; + AMDGPUPALMetadata MD; + + static void SetUpTestSuite() { + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetMC(); + } + + PALMetadata() { + StringRef Triple = "amdgcn--amdpal"; + StringRef CPU = "gfx1010"; + StringRef FS = ""; + + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error); + TargetOptions Options; + + TM.reset(static_cast<GCNTargetMachine *>(TheTarget->createTargetMachine( + Triple, CPU, FS, Options, std::nullopt, std::nullopt))); + + Ctx = std::make_unique<LLVMContext>(); + M = std::make_unique<Module>("Module", *Ctx); + M->setDataLayout(TM->createDataLayout()); + auto *FType = FunctionType::get(Type::getVoidTy(*Ctx), false); + auto *F = Function::Create(FType, GlobalValue::ExternalLinkage, "Test", *M); + MMI = std::make_unique<MachineModuleInfo>(TM.get()); + + ST = std::make_unique<GCNSubtarget>(TM->getTargetTriple(), + TM->getTargetCPU(), + TM->getTargetFeatureString(), *TM); + + MF = std::make_unique<MachineFunction>(*F, *TM, *ST, 1, *MMI); + } +}; + +TEST_F(PALMetadata, ResourceRegisterSetORsResolvableUnknown) { + StringRef yaml = "---\n" + "amdpal.pipelines:\n" + " - .hardware_stages:\n" + " .es:\n" + " .entry_point: Test\n" + " .scratch_memory_size: 0\n" + " .sgpr_count: 0x1\n" + " .vgpr_count: 0x1\n" + " .registers:\n" + " \'0x2c4a (SPI_SHADER_PGM_RSRC1_VS)\': 0x2f0000\n" + " \'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0\n" + "...\n"; + + MCContext &MCCtx = MF->getContext(); + auto CC = CallingConv::AMDGPU_VS; + MD.setFromString(yaml); + MD.setRsrc2(CC, MCConstantExpr::create(42, MCCtx), MCCtx); + MCSymbol *Sym = MCCtx.getOrCreateSymbol("Unknown"); + MD.setRsrc2(CC, MCSymbolRefExpr::create(Sym, MCCtx), MCCtx); + EXPECT_FALSE(MD.resolvedAllMCExpr()); + + MD.setRsrc2(CC, MCConstantExpr::create(0xff00, MCCtx), MCCtx); + Sym->setVariableValue(MCConstantExpr::create(0xffff0000, MCCtx)); + std::string Output; + MD.toString(Output); + + EXPECT_TRUE(MD.resolvedAllMCExpr()); + + auto n = Output.find("\'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0xffffff2a"); + EXPECT_TRUE(n != std::string::npos); +} + +TEST_F(PALMetadata, ResourceRegisterSetORsResolvableUnknowns) { + StringRef yaml = "---\n" + "amdpal.pipelines:\n" + " - .hardware_stages:\n" + " .es:\n" + " .entry_point: Test\n" + " .scratch_memory_size: 0\n" + " .sgpr_count: 0x1\n" + " .vgpr_count: 0x1\n" + " .registers:\n" + " \'0x2c4a (SPI_SHADER_PGM_RSRC1_VS)\': 0x2f0000\n" + " \'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0\n" + "...\n"; + + MCContext &MCCtx = MF->getContext(); + auto CC = CallingConv::AMDGPU_VS; + MD.setFromString(yaml); + MCSymbol *SymOne = MCCtx.getOrCreateSymbol("UnknownOne"); + MD.setRsrc2(CC, MCSymbolRefExpr::create(SymOne, MCCtx), MCCtx); + + MD.setRsrc2(CC, MCConstantExpr::create(42, MCCtx), MCCtx); + + MCSymbol *SymTwo = MCCtx.getOrCreateSymbol("UnknownTwo"); + MD.setRsrc2(CC, MCSymbolRefExpr::create(SymTwo, MCCtx), MCCtx); + EXPECT_FALSE(MD.resolvedAllMCExpr()); + + SymOne->setVariableValue(MCConstantExpr::create(0xffff0000, MCCtx)); + SymTwo->setVariableValue(MCConstantExpr::create(0x0000ff00, MCCtx)); + + std::string Output; + MD.toString(Output); + + EXPECT_TRUE(MD.resolvedAllMCExpr()); + + auto n = Output.find("\'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0xffffff2a"); + EXPECT_TRUE(n != std::string::npos); +} + +TEST_F(PALMetadata, ResourceRegisterSetORsPreset) { + StringRef yaml = "---\n" + "amdpal.pipelines:\n" + " - .hardware_stages:\n" + " .es:\n" + " .entry_point: Test\n" + " .scratch_memory_size: 0\n" + " .sgpr_count: 0x1\n" + " .vgpr_count: 0x1\n" + " .registers:\n" + " \'0x2c4a (SPI_SHADER_PGM_RSRC1_VS)\': 0x2f0000\n" + " \'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0x2a\n" + "...\n"; + + MCContext &MCCtx = MF->getContext(); + auto CC = CallingConv::AMDGPU_VS; + MD.setFromString(yaml); + MCSymbol *Sym = MCCtx.getOrCreateSymbol("Unknown"); + MD.setRsrc2(CC, MCSymbolRefExpr::create(Sym, MCCtx), MCCtx); + MD.setRsrc2(CC, MCConstantExpr::create(0xff00, MCCtx), MCCtx); + Sym->setVariableValue(MCConstantExpr::create(0xffff0000, MCCtx)); + std::string Output; + MD.toString(Output); + + auto n = Output.find("\'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0xffffff2a"); + EXPECT_TRUE(n != std::string::npos); +} + +TEST_F(PALMetadata, ResourceRegisterSetORs) { + StringRef yaml = "---\n" + "amdpal.pipelines:\n" + " - .hardware_stages:\n" + " .es:\n" + " .entry_point: Test\n" + " .scratch_memory_size: 0\n" + " .sgpr_count: 0x1\n" + " .vgpr_count: 0x1\n" + " .registers:\n" + " \'0x2c4a (SPI_SHADER_PGM_RSRC1_VS)\': 0x2f0000\n" + " \'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0\n" + "...\n"; + + MCContext &MCCtx = MF->getContext(); + auto CC = CallingConv::AMDGPU_VS; + MD.setFromString(yaml); + MCSymbol *Sym = MCCtx.getOrCreateSymbol("Unknown"); + MD.setRsrc2(CC, MCSymbolRefExpr::create(Sym, MCCtx), MCCtx); + MD.setRsrc2(CC, 42); + MD.setRsrc2(CC, MCConstantExpr::create(0xff00, MCCtx), MCCtx); + Sym->setVariableValue(MCConstantExpr::create(0xffff0000, MCCtx)); + std::string Output; + MD.toString(Output); + + auto n = Output.find("\'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0xffffff2a"); + EXPECT_TRUE(n != std::string::npos); +} + +TEST_F(PALMetadata, ResourceRegisterSetUnresolvedSym) { + StringRef yaml = "---\n" + "amdpal.pipelines:\n" + " - .hardware_stages:\n" + " .es:\n" + " .entry_point: Test\n" + " .scratch_memory_size: 0\n" + " .sgpr_count: 0x1\n" + " .vgpr_count: 0x1\n" + " .registers:\n" + " \'0x2c4a (SPI_SHADER_PGM_RSRC1_VS)\': 0x2f0000\n" + " \'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0\n" + "...\n"; + + MCContext &MCCtx = MF->getContext(); + auto CC = CallingConv::AMDGPU_VS; + MD.setFromString(yaml); + MCSymbol *Sym = MCCtx.getOrCreateSymbol("Unknown"); + MD.setRsrc2(CC, MCSymbolRefExpr::create(Sym, MCCtx), MCCtx); + MD.setRsrc2(CC, MCConstantExpr::create(0xff00, MCCtx), MCCtx); + std::string Output; + + MD.toString(Output); + EXPECT_FALSE(MD.resolvedAllMCExpr()); +} + +TEST_F(PALMetadata, ResourceRegisterSetNoEmitUnresolved) { + StringRef yaml = "---\n" + "amdpal.pipelines:\n" + " - .hardware_stages:\n" + " .es:\n" + " .entry_point: Test\n" + " .scratch_memory_size: 0\n" + " .sgpr_count: 0x1\n" + " .vgpr_count: 0x1\n" + " .registers:\n" + " \'0x2c4a (SPI_SHADER_PGM_RSRC1_VS)\': 0x2f0000\n" + " \'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0\n" + "...\n"; + + MCContext &MCCtx = MF->getContext(); + auto CC = CallingConv::AMDGPU_VS; + MD.setFromString(yaml); + MCSymbol *Sym = MCCtx.getOrCreateSymbol("Unknown"); + MD.setRsrc2(CC, MCSymbolRefExpr::create(Sym, MCCtx), MCCtx); + MD.setRsrc2(CC, MCConstantExpr::create(0xff00, MCCtx), MCCtx); + + EXPECT_FALSE(MD.resolvedAllMCExpr()); +} |