aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/DirectX
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/DirectX')
-rw-r--r--llvm/lib/Target/DirectX/DXContainerGlobals.cpp15
-rw-r--r--llvm/lib/Target/DirectX/DXIL.td27
-rw-r--r--llvm/lib/Target/DirectX/DXILCBufferAccess.cpp310
-rw-r--r--llvm/lib/Target/DirectX/DXILDataScalarization.cpp95
-rw-r--r--llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp4
-rw-r--r--llvm/lib/Target/DirectX/DXILOpLowering.cpp2
-rw-r--r--llvm/lib/Target/DirectX/DXILResourceAccess.cpp156
-rw-r--r--llvm/lib/Target/DirectX/DXILRootSignature.h1
-rw-r--r--llvm/lib/Target/DirectX/DXILShaderFlags.cpp29
-rw-r--r--llvm/lib/Target/DirectX/DXILShaderFlags.h1
-rw-r--r--llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp66
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp4
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h2
-rw-r--r--llvm/lib/Target/DirectX/DirectX.td2
-rw-r--r--llvm/lib/Target/DirectX/DirectXInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/DirectX/DirectXTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp10
17 files changed, 359 insertions, 371 deletions
diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
index 8ace2d2..95577dd 100644
--- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
+++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
@@ -29,7 +29,6 @@
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <cstdint>
-#include <optional>
using namespace llvm;
using namespace llvm::dxil;
@@ -194,9 +193,10 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) {
dxbc::PSV::v2::ResourceBindInfo BindInfo;
BindInfo.Type = Type;
BindInfo.LowerBound = Binding.LowerBound;
- assert(Binding.Size == UINT32_MAX ||
- (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX &&
- "Resource range is too large");
+ assert(
+ (Binding.Size == UINT32_MAX ||
+ (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX) &&
+ "Resource range is too large");
BindInfo.UpperBound = (Binding.Size == UINT32_MAX)
? UINT32_MAX
: Binding.LowerBound + Binding.Size - 1;
@@ -284,6 +284,13 @@ void DXContainerGlobals::addPipelineStateValidationInfo(
PSV.BaseData.NumThreadsX = MMI.EntryPropertyVec[0].NumThreadsX;
PSV.BaseData.NumThreadsY = MMI.EntryPropertyVec[0].NumThreadsY;
PSV.BaseData.NumThreadsZ = MMI.EntryPropertyVec[0].NumThreadsZ;
+ if (MMI.EntryPropertyVec[0].WaveSizeMin) {
+ PSV.BaseData.MinimumWaveLaneCount = MMI.EntryPropertyVec[0].WaveSizeMin;
+ PSV.BaseData.MaximumWaveLaneCount =
+ MMI.EntryPropertyVec[0].WaveSizeMax
+ ? MMI.EntryPropertyVec[0].WaveSizeMax
+ : MMI.EntryPropertyVec[0].WaveSizeMin;
+ }
break;
default:
break;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 7ae500a..8b286626 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -930,6 +930,24 @@ def Discard : DXILOp<82, discard> {
let stages = [Stages<DXIL1_0, [pixel]>];
}
+def DerivCoarseX : DXILOp<83, unary> {
+ let Doc = "computes the rate of change per stamp in x direction";
+ let intrinsics = [IntrinSelect<int_dx_ddx_coarse>];
+ let arguments = [OverloadTy];
+ let result = OverloadTy;
+ let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+ let stages = [Stages<DXIL1_0, [library, pixel]>];
+}
+
+def DerivCoarseY : DXILOp<84, unary> {
+ let Doc = "computes the rate of change per stamp in y direction";
+ let intrinsics = [IntrinSelect<int_dx_ddy_coarse>];
+ let arguments = [OverloadTy];
+ let result = OverloadTy;
+ let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+ let stages = [Stages<DXIL1_0, [library, pixel]>];
+}
+
def ThreadId : DXILOp<93, threadId> {
let Doc = "Reads the thread ID";
let intrinsics = [IntrinSelect<int_dx_thread_id>];
@@ -1079,6 +1097,15 @@ def WaveActiveOp : DXILOp<119, waveActiveOp> {
let attributes = [Attributes<DXIL1_0, []>];
}
+def LegacyF16ToF32 : DXILOp<131, legacyF16ToF32> {
+ let Doc = "returns the float16 stored in the low-half of the uint converted "
+ "to a float";
+ let intrinsics = [IntrinSelect<int_dx_legacyf16tof32>];
+ let arguments = [Int32Ty];
+ let result = FloatTy;
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
def WaveAllBitCount : DXILOp<135, waveAllOp> {
let Doc = "returns the count of bits set to 1 across the wave";
let intrinsics = [IntrinSelect<int_dx_wave_active_countbits>];
diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
index 4427797..5624532 100644
--- a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp
@@ -8,11 +8,13 @@
#include "DXILCBufferAccess.h"
#include "DirectX.h"
+#include "llvm/Analysis/DXILResource.h"
#include "llvm/Frontend/HLSL/CBuffer.h"
#include "llvm/Frontend/HLSL/HLSLResource.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsDirectX.h"
+#include "llvm/IR/ReplaceConstant.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/FormatVariadic.h"
@@ -21,297 +23,41 @@
#define DEBUG_TYPE "dxil-cbuffer-access"
using namespace llvm;
-namespace {
-/// Helper for building a `load.cbufferrow` intrinsic given a simple type.
-struct CBufferRowIntrin {
- Intrinsic::ID IID;
- Type *RetTy;
- unsigned int EltSize;
- unsigned int NumElts;
-
- CBufferRowIntrin(const DataLayout &DL, Type *Ty) {
- assert(Ty == Ty->getScalarType() && "Expected scalar type");
-
- switch (DL.getTypeSizeInBits(Ty)) {
- case 16:
- IID = Intrinsic::dx_resource_load_cbufferrow_8;
- RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty);
- EltSize = 2;
- NumElts = 8;
- break;
- case 32:
- IID = Intrinsic::dx_resource_load_cbufferrow_4;
- RetTy = StructType::get(Ty, Ty, Ty, Ty);
- EltSize = 4;
- NumElts = 4;
- break;
- case 64:
- IID = Intrinsic::dx_resource_load_cbufferrow_2;
- RetTy = StructType::get(Ty, Ty);
- EltSize = 8;
- NumElts = 2;
- break;
- default:
- llvm_unreachable("Only 16, 32, and 64 bit types supported");
- }
- }
-};
-
-// Helper for creating CBuffer handles and loading data from them
-struct CBufferResource {
- GlobalVariable *GVHandle;
- GlobalVariable *Member;
- size_t MemberOffset;
-
- LoadInst *Handle;
-
- CBufferResource(GlobalVariable *GVHandle, GlobalVariable *Member,
- size_t MemberOffset)
- : GVHandle(GVHandle), Member(Member), MemberOffset(MemberOffset) {}
-
- const DataLayout &getDataLayout() { return GVHandle->getDataLayout(); }
- Type *getValueType() { return Member->getValueType(); }
- iterator_range<ConstantDataSequential::user_iterator> users() {
- return Member->users();
- }
-
- /// Get the byte offset of a Pointer-typed Value * `Val` relative to Member.
- /// `Val` can either be Member itself, or a GEP of a constant offset from
- /// Member
- size_t getOffsetForCBufferGEP(Value *Val) {
- assert(isa<PointerType>(Val->getType()) &&
- "Expected a pointer-typed value");
-
- if (Val == Member)
- return 0;
-
- if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
- // Since we should always have a constant offset, we should only ever have
- // a single GEP of indirection from the Global.
- assert(GEP->getPointerOperand() == Member &&
- "Indirect access to resource handle");
-
- const DataLayout &DL = getDataLayout();
- APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
- bool Success = GEP->accumulateConstantOffset(DL, ConstantOffset);
- (void)Success;
- assert(Success && "Offsets into cbuffer globals must be constant");
-
- if (auto *ATy = dyn_cast<ArrayType>(Member->getValueType()))
- ConstantOffset =
- hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy);
-
- return ConstantOffset.getZExtValue();
- }
-
- llvm_unreachable("Expected Val to be a GlobalVariable or GEP");
- }
-
- /// Create a handle for this cbuffer resource using the IRBuilder `Builder`
- /// and sets the handle as the current one to use for subsequent calls to
- /// `loadValue`
- void createAndSetCurrentHandle(IRBuilder<> &Builder) {
- Handle = Builder.CreateLoad(GVHandle->getValueType(), GVHandle,
- GVHandle->getName());
+static void replaceUsersOfGlobal(GlobalVariable *Global,
+ GlobalVariable *HandleGV, size_t Offset) {
+ for (Use &U : make_early_inc_range(Global->uses())) {
+ auto UseInst = dyn_cast<Instruction>(U.getUser());
+ // TODO: Constants? Metadata?
+ assert(UseInst && "Non-instruction use of cbuffer");
+
+ IRBuilder<> Builder(UseInst);
+ LoadInst *Handle = Builder.CreateLoad(HandleGV->getValueType(), HandleGV,
+ HandleGV->getName());
+ Value *Ptr = Builder.CreateIntrinsic(
+ Global->getType(), Intrinsic::dx_resource_getpointer,
+ ArrayRef<Value *>{Handle,
+ ConstantInt::get(Builder.getInt32Ty(), Offset)});
+ U.set(Ptr);
}
- /// Load a value of type `Ty` at offset `Offset` using the handle from the
- /// last call to `createAndSetCurrentHandle`
- Value *loadValue(IRBuilder<> &Builder, Type *Ty, size_t Offset,
- const Twine &Name = "") {
- assert(Handle &&
- "Expected a handle for this cbuffer global resource to be created "
- "before loading a value from it");
- const DataLayout &DL = getDataLayout();
-
- size_t TargetOffset = MemberOffset + Offset;
- CBufferRowIntrin Intrin(DL, Ty->getScalarType());
- // The cbuffer consists of some number of 16-byte rows.
- unsigned int CurrentRow = TargetOffset / hlsl::CBufferRowSizeInBytes;
- unsigned int CurrentIndex =
- (TargetOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
-
- auto *CBufLoad = Builder.CreateIntrinsic(
- Intrin.RetTy, Intrin.IID,
- {Handle, ConstantInt::get(Builder.getInt32Ty(), CurrentRow)}, nullptr,
- Name + ".load");
- auto *Elt = Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
- Name + ".extract");
-
- Value *Result = nullptr;
- unsigned int Remaining =
- ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
-
- if (Remaining == 0) {
- // We only have a single element, so we're done.
- Result = Elt;
-
- // However, if we loaded a <1 x T>, then we need to adjust the type here.
- if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
- assert(VT->getNumElements() == 1 &&
- "Can't have multiple elements here");
- Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
- Builder.getInt32(0), Name);
- }
- return Result;
- }
-
- // Walk each element and extract it, wrapping to new rows as needed.
- SmallVector<Value *> Extracts{Elt};
- while (Remaining--) {
- CurrentIndex %= Intrin.NumElts;
-
- if (CurrentIndex == 0)
- CBufLoad = Builder.CreateIntrinsic(
- Intrin.RetTy, Intrin.IID,
- {Handle, ConstantInt::get(Builder.getInt32Ty(), ++CurrentRow)},
- nullptr, Name + ".load");
-
- Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
- Name + ".extract"));
- }
-
- // Finally, we build up the original loaded value.
- Result = PoisonValue::get(Ty);
- for (int I = 0, E = Extracts.size(); I < E; ++I)
- Result =
- Builder.CreateInsertElement(Result, Extracts[I], Builder.getInt32(I),
- Name + formatv(".upto{}", I));
- return Result;
- }
-};
-
-} // namespace
-
-/// Replace load via cbuffer global with a load from the cbuffer handle itself.
-static void replaceLoad(LoadInst *LI, CBufferResource &CBR,
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
- size_t Offset = CBR.getOffsetForCBufferGEP(LI->getPointerOperand());
- IRBuilder<> Builder(LI);
- CBR.createAndSetCurrentHandle(Builder);
- Value *Result = CBR.loadValue(Builder, LI->getType(), Offset, LI->getName());
- LI->replaceAllUsesWith(Result);
- DeadInsts.push_back(LI);
-}
-
-/// This function recursively copies N array elements from the cbuffer resource
-/// CBR to the MemCpy Destination. Recursion is used to unravel multidimensional
-/// arrays into a sequence of scalar/vector extracts and stores.
-static void copyArrayElemsForMemCpy(IRBuilder<> &Builder, MemCpyInst *MCI,
- CBufferResource &CBR, ArrayType *ArrTy,
- size_t ArrOffset, size_t N,
- const Twine &Name = "") {
- const DataLayout &DL = MCI->getDataLayout();
- Type *ElemTy = ArrTy->getElementType();
- size_t ElemTySize = DL.getTypeAllocSize(ElemTy);
- for (unsigned I = 0; I < N; ++I) {
- size_t Offset = ArrOffset + I * ElemTySize;
-
- // Recursively copy nested arrays
- if (ArrayType *ElemArrTy = dyn_cast<ArrayType>(ElemTy)) {
- copyArrayElemsForMemCpy(Builder, MCI, CBR, ElemArrTy, Offset,
- ElemArrTy->getNumElements(), Name);
- continue;
- }
-
- // Load CBuffer value and store it in Dest
- APInt CBufArrayOffset(
- DL.getIndexTypeSizeInBits(MCI->getSource()->getType()), Offset);
- CBufArrayOffset =
- hlsl::translateCBufArrayOffset(DL, CBufArrayOffset, ArrTy);
- Value *CBufferVal =
- CBR.loadValue(Builder, ElemTy, CBufArrayOffset.getZExtValue(), Name);
- Value *GEP =
- Builder.CreateInBoundsGEP(Builder.getInt8Ty(), MCI->getDest(),
- {Builder.getInt32(Offset)}, Name + ".dest");
- Builder.CreateStore(CBufferVal, GEP, MCI->isVolatile());
- }
-}
-
-/// Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle
-/// itself. Assumes the cbuffer global is an array, and the length of bytes to
-/// copy is divisible by array element allocation size.
-/// The memcpy source must also be a direct cbuffer global reference, not a GEP.
-static void replaceMemCpy(MemCpyInst *MCI, CBufferResource &CBR) {
-
- ArrayType *ArrTy = dyn_cast<ArrayType>(CBR.getValueType());
- assert(ArrTy && "MemCpy lowering is only supported for array types");
-
- // This assumption vastly simplifies the implementation
- if (MCI->getSource() != CBR.Member)
- reportFatalUsageError(
- "Expected MemCpy source to be a cbuffer global variable");
-
- ConstantInt *Length = dyn_cast<ConstantInt>(MCI->getLength());
- uint64_t ByteLength = Length->getZExtValue();
-
- // If length to copy is zero, no memcpy is needed
- if (ByteLength == 0) {
- MCI->eraseFromParent();
- return;
- }
-
- const DataLayout &DL = CBR.getDataLayout();
-
- Type *ElemTy = ArrTy->getElementType();
- size_t ElemSize = DL.getTypeAllocSize(ElemTy);
- assert(ByteLength % ElemSize == 0 &&
- "Length of bytes to MemCpy must be divisible by allocation size of "
- "source/destination array elements");
- size_t ElemsToCpy = ByteLength / ElemSize;
-
- IRBuilder<> Builder(MCI);
- CBR.createAndSetCurrentHandle(Builder);
-
- copyArrayElemsForMemCpy(Builder, MCI, CBR, ArrTy, 0, ElemsToCpy,
- "memcpy." + MCI->getDest()->getName() + "." +
- MCI->getSource()->getName());
-
- MCI->eraseFromParent();
-}
-
-static void replaceAccessesWithHandle(CBufferResource &CBR) {
- SmallVector<WeakTrackingVH> DeadInsts;
-
- SmallVector<User *> ToProcess{CBR.users()};
- while (!ToProcess.empty()) {
- User *Cur = ToProcess.pop_back_val();
-
- // If we have a load instruction, replace the access.
- if (auto *LI = dyn_cast<LoadInst>(Cur)) {
- replaceLoad(LI, CBR, DeadInsts);
- continue;
- }
-
- // If we have a memcpy instruction, replace it with multiple accesses and
- // subsequent stores to the destination
- if (auto *MCI = dyn_cast<MemCpyInst>(Cur)) {
- replaceMemCpy(MCI, CBR);
- continue;
- }
-
- // Otherwise, walk users looking for a load...
- if (isa<GetElementPtrInst>(Cur) || isa<GEPOperator>(Cur)) {
- ToProcess.append(Cur->user_begin(), Cur->user_end());
- continue;
- }
-
- llvm_unreachable("Unexpected user of Global");
- }
- RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+ Global->removeFromParent();
}
static bool replaceCBufferAccesses(Module &M) {
- std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(M);
+ std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(
+ M, [](Type *Ty) { return isa<llvm::dxil::PaddingExtType>(Ty); });
if (!CBufMD)
return false;
+ SmallVector<Constant *> CBufferGlobals;
+ for (const hlsl::CBufferMapping &Mapping : *CBufMD)
+ for (const hlsl::CBufferMember &Member : Mapping.Members)
+ CBufferGlobals.push_back(Member.GV);
+ convertUsersOfConstantsToInstructions(CBufferGlobals);
+
for (const hlsl::CBufferMapping &Mapping : *CBufMD)
- for (const hlsl::CBufferMember &Member : Mapping.Members) {
- CBufferResource CBR(Mapping.Handle, Member.GV, Member.Offset);
- replaceAccessesWithHandle(CBR);
- Member.GV->removeFromParent();
- }
+ for (const hlsl::CBufferMember &Member : Mapping.Members)
+ replaceUsersOfGlobal(Member.GV, Mapping.Handle, Member.Offset);
CBufMD->eraseFromModule();
return true;
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index d507d71..5f18c37 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -29,20 +29,6 @@ static const int MaxVecSize = 4;
using namespace llvm;
-// Recursively creates an array-like version of a given vector type.
-static Type *equivalentArrayTypeFromVector(Type *T) {
- if (auto *VecTy = dyn_cast<VectorType>(T))
- return ArrayType::get(VecTy->getElementType(),
- dyn_cast<FixedVectorType>(VecTy)->getNumElements());
- if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
- Type *NewElementType =
- equivalentArrayTypeFromVector(ArrayTy->getElementType());
- return ArrayType::get(NewElementType, ArrayTy->getNumElements());
- }
- // If it's not a vector or array, return the original type.
- return T;
-}
-
class DXILDataScalarizationLegacy : public ModulePass {
public:
@@ -121,12 +107,25 @@ DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) {
static bool isVectorOrArrayOfVectors(Type *T) {
if (isa<VectorType>(T))
return true;
- if (ArrayType *ArrType = dyn_cast<ArrayType>(T))
- return isa<VectorType>(ArrType->getElementType()) ||
- isVectorOrArrayOfVectors(ArrType->getElementType());
+ if (ArrayType *ArrayTy = dyn_cast<ArrayType>(T))
+ return isVectorOrArrayOfVectors(ArrayTy->getElementType());
return false;
}
+// Recursively creates an array-like version of a given vector type.
+static Type *equivalentArrayTypeFromVector(Type *T) {
+ if (auto *VecTy = dyn_cast<VectorType>(T))
+ return ArrayType::get(VecTy->getElementType(),
+ dyn_cast<FixedVectorType>(VecTy)->getNumElements());
+ if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
+ Type *NewElementType =
+ equivalentArrayTypeFromVector(ArrayTy->getElementType());
+ return ArrayType::get(NewElementType, ArrayTy->getNumElements());
+ }
+ // If it's not a vector or array, return the original type.
+ return T;
+}
+
bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
Type *AllocatedType = AI.getAllocatedType();
if (!isVectorOrArrayOfVectors(AllocatedType))
@@ -135,7 +134,7 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
IRBuilder<> Builder(&AI);
Type *NewType = equivalentArrayTypeFromVector(AllocatedType);
AllocaInst *ArrAlloca =
- Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarize");
+ Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarized");
ArrAlloca->setAlignment(AI.getAlign());
AI.replaceAllUsesWith(ArrAlloca);
AI.eraseFromParent();
@@ -303,42 +302,44 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
GEPOperator *GOp = cast<GEPOperator>(&GEPI);
Value *PtrOperand = GOp->getPointerOperand();
- Type *NewGEPType = GOp->getSourceElementType();
- bool NeedsTransform = false;
-
- // Unwrap GEP ConstantExprs to find the base operand and element type
- while (auto *CE = dyn_cast<ConstantExpr>(PtrOperand)) {
- if (auto *GEPCE = dyn_cast<GEPOperator>(CE)) {
- GOp = GEPCE;
- PtrOperand = GEPCE->getPointerOperand();
- NewGEPType = GEPCE->getSourceElementType();
- } else
- break;
+ Type *GEPType = GOp->getSourceElementType();
+
+ // Replace a GEP ConstantExpr pointer operand with a GEP instruction so that
+ // it can be visited
+ if (auto *PtrOpGEPCE = dyn_cast<ConstantExpr>(PtrOperand);
+ PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) {
+ GetElementPtrInst *OldGEPI =
+ cast<GetElementPtrInst>(PtrOpGEPCE->getAsInstruction());
+ OldGEPI->insertBefore(GEPI.getIterator());
+
+ IRBuilder<> Builder(&GEPI);
+ SmallVector<Value *> Indices(GEPI.indices());
+ Value *NewGEP =
+ Builder.CreateGEP(GEPI.getSourceElementType(), OldGEPI, Indices,
+ GEPI.getName(), GEPI.getNoWrapFlags());
+ assert(isa<GetElementPtrInst>(NewGEP) &&
+ "Expected newly-created GEP to be an instruction");
+ GetElementPtrInst *NewGEPI = cast<GetElementPtrInst>(NewGEP);
+
+ GEPI.replaceAllUsesWith(NewGEPI);
+ GEPI.eraseFromParent();
+ visitGetElementPtrInst(*OldGEPI);
+ visitGetElementPtrInst(*NewGEPI);
+ return true;
}
- if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
- NewGEPType = NewGlobal->getValueType();
- PtrOperand = NewGlobal;
- NeedsTransform = true;
- } else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) {
- Type *AllocatedType = Alloca->getAllocatedType();
- if (isa<ArrayType>(AllocatedType) &&
- AllocatedType != GOp->getResultElementType()) {
- NewGEPType = AllocatedType;
- NeedsTransform = true;
- }
- }
+ Type *NewGEPType = equivalentArrayTypeFromVector(GEPType);
+ Value *NewPtrOperand = PtrOperand;
+ if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand))
+ NewPtrOperand = NewGlobal;
+ bool NeedsTransform = NewPtrOperand != PtrOperand || NewGEPType != GEPType;
if (!NeedsTransform)
return false;
- // Keep scalar GEPs scalar; dxil-flatten-arrays will do flattening later
- if (!isa<ArrayType>(GOp->getSourceElementType()))
- NewGEPType = GOp->getSourceElementType();
-
IRBuilder<> Builder(&GEPI);
- SmallVector<Value *, MaxVecSize> Indices(GOp->indices());
- Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices,
+ SmallVector<Value *, MaxVecSize> Indices(GOp->idx_begin(), GOp->idx_end());
+ Value *NewGEP = Builder.CreateGEP(NewGEPType, NewPtrOperand, Indices,
GOp->getName(), GOp->getNoWrapFlags());
GOp->replaceAllUsesWith(NewGEP);
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index ebb7c26..e0d2dbd 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -197,6 +197,7 @@ static Value *expand16BitIsNormal(CallInst *Orig) {
static bool isIntrinsicExpansion(Function &F) {
switch (F.getIntrinsicID()) {
+ case Intrinsic::assume:
case Intrinsic::abs:
case Intrinsic::atan2:
case Intrinsic::exp:
@@ -988,6 +989,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::abs:
Result = expandAbs(Orig);
break;
+ case Intrinsic::assume:
+ Orig->eraseFromParent();
+ return true;
case Intrinsic::atan2:
Result = expandAtan2Intrinsic(Orig);
break;
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 8720460..e46a393 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -904,8 +904,6 @@ public:
case Intrinsic::dx_resource_casthandle:
// NOTE: llvm.dbg.value is supported as is in DXIL.
case Intrinsic::dbg_value:
- // NOTE: llvm.assume is supported as is in DXIL.
- case Intrinsic::assume:
case Intrinsic::not_intrinsic:
if (F.use_empty())
F.eraseFromParent();
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index 6579d34..057d87b 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -10,6 +10,7 @@
#include "DirectX.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/DXILResource.h"
+#include "llvm/Frontend/HLSL/HLSLResource.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
@@ -20,6 +21,7 @@
#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#define DEBUG_TYPE "dxil-resource-access"
@@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset,
APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
if (GEP->accumulateConstantOffset(DL, ConstantOffset)) {
APInt Scaled = ConstantOffset.udiv(ScalarSize);
- return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled);
+ return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled);
}
- auto IndexIt = GEP->idx_begin();
- assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
- "GEP is not indexing through pointer");
- ++IndexIt;
- Value *Offset = *IndexIt;
- assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
- return Offset;
+ unsigned NumIndices = GEP->getNumIndices();
+
+ // If we have a single index we're indexing into a top level array. This
+ // generally only happens with cbuffers.
+ if (NumIndices == 1)
+ return *GEP->idx_begin();
+
+ // If we have two indices, this should be a simple access through a pointer.
+ if (NumIndices == 2) {
+ auto IndexIt = GEP->idx_begin();
+ assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
+ "GEP is not indexing through pointer");
+ ++IndexIt;
+ Value *Offset = *IndexIt;
+ assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
+ return Offset;
+ }
+
+ llvm_unreachable("Unhandled GEP structure for resource access");
}
static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI,
@@ -171,6 +185,127 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) {
LI->replaceAllUsesWith(V);
}
+namespace {
+/// Helper for building a `load.cbufferrow` intrinsic given a simple type.
+struct CBufferRowIntrin {
+ Intrinsic::ID IID;
+ Type *RetTy;
+ unsigned int EltSize;
+ unsigned int NumElts;
+
+ CBufferRowIntrin(const DataLayout &DL, Type *Ty) {
+ assert(Ty == Ty->getScalarType() && "Expected scalar type");
+
+ switch (DL.getTypeSizeInBits(Ty)) {
+ case 16:
+ IID = Intrinsic::dx_resource_load_cbufferrow_8;
+ RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty);
+ EltSize = 2;
+ NumElts = 8;
+ break;
+ case 32:
+ IID = Intrinsic::dx_resource_load_cbufferrow_4;
+ RetTy = StructType::get(Ty, Ty, Ty, Ty);
+ EltSize = 4;
+ NumElts = 4;
+ break;
+ case 64:
+ IID = Intrinsic::dx_resource_load_cbufferrow_2;
+ RetTy = StructType::get(Ty, Ty);
+ EltSize = 8;
+ NumElts = 2;
+ break;
+ default:
+ llvm_unreachable("Only 16, 32, and 64 bit types supported");
+ }
+ }
+};
+} // namespace
+
+static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset,
+ dxil::ResourceTypeInfo &RTI) {
+ const DataLayout &DL = LI->getDataLayout();
+
+ Type *Ty = LI->getType();
+ assert(!isa<StructType>(Ty) && "Structs not handled yet");
+ CBufferRowIntrin Intrin(DL, Ty->getScalarType());
+
+ StringRef Name = LI->getName();
+ Value *Handle = II->getOperand(0);
+
+ IRBuilder<> Builder(LI);
+
+ ConstantInt *GlobalOffset = dyn_cast<ConstantInt>(II->getOperand(1));
+ assert(GlobalOffset && "CBuffer getpointer index must be constant");
+
+ unsigned int FixedOffset = GlobalOffset->getZExtValue();
+ // If we have a further constant offset we can just fold it in to the fixed
+ // offset.
+ if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) {
+ FixedOffset += ConstOffset->getZExtValue();
+ Offset = nullptr;
+ }
+
+ Value *CurrentRow = ConstantInt::get(
+ Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes);
+ unsigned int CurrentIndex =
+ (FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
+
+ assert(!(CurrentIndex && Offset) &&
+ "Dynamic indexing into elements of cbuffer rows is not supported");
+ // At this point if we have a non-constant offset it has to be an array
+ // offset, so we can assume that it's a multiple of the row size.
+ if (Offset)
+ CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset;
+
+ auto *CBufLoad = Builder.CreateIntrinsic(
+ Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load");
+ auto *Elt =
+ Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract");
+
+ // At this point we've loaded the first scalar of our result, but our original
+ // type may have been a vector.
+ unsigned int Remaining =
+ ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
+ if (Remaining == 0) {
+ // We only have a single element, so we're done.
+ Value *Result = Elt;
+
+ // However, if we loaded a <1 x T>, then we need to adjust the type.
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+ assert(VT->getNumElements() == 1 && "Can't have multiple elements here");
+ Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
+ Builder.getInt32(0), Name);
+ }
+ LI->replaceAllUsesWith(Result);
+ return;
+ }
+
+ // Walk each element and extract it, wrapping to new rows as needed.
+ SmallVector<Value *> Extracts{Elt};
+ while (Remaining--) {
+ CurrentIndex %= Intrin.NumElts;
+
+ if (CurrentIndex == 0) {
+ CurrentRow = Builder.CreateAdd(CurrentRow,
+ ConstantInt::get(Builder.getInt32Ty(), 1));
+ CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID,
+ {Handle, CurrentRow}, nullptr,
+ Name + ".load");
+ }
+
+ Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
+ Name + ".extract"));
+ }
+
+ // Finally, we build up the original loaded value.
+ Value *Result = PoisonValue::get(Ty);
+ for (int I = 0, E = Extracts.size(); I < E; ++I)
+ Result = Builder.CreateInsertElement(
+ Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I));
+ LI->replaceAllUsesWith(Result);
+}
+
static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
dxil::ResourceTypeInfo &RTI) {
switch (RTI.getResourceKind()) {
@@ -179,6 +314,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
case dxil::ResourceKind::RawBuffer:
case dxil::ResourceKind::StructuredBuffer:
return createRawLoad(II, LI, Offset);
+ case dxil::ResourceKind::CBuffer:
+ return createCBufferLoad(II, LI, Offset, RTI);
case dxil::ResourceKind::Texture1D:
case dxil::ResourceKind::Texture2D:
case dxil::ResourceKind::Texture2DMS:
@@ -190,9 +327,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
case dxil::ResourceKind::TextureCubeArray:
case dxil::ResourceKind::FeedbackTexture2D:
case dxil::ResourceKind::FeedbackTexture2DArray:
- case dxil::ResourceKind::CBuffer:
case dxil::ResourceKind::TBuffer:
- // TODO: handle these
+ reportFatalUsageError("Load not yet implemented for resource type");
return;
case dxil::ResourceKind::Sampler:
case dxil::ResourceKind::RTAccelerationStructure:
diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.h b/llvm/lib/Target/DirectX/DXILRootSignature.h
index b990b6c..ec82aa9 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.h
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.h
@@ -21,7 +21,6 @@
#include "llvm/IR/PassManager.h"
#include "llvm/MC/DXContainerRootSignature.h"
#include "llvm/Pass.h"
-#include <optional>
namespace llvm {
namespace dxil {
diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
index ce6e812..e0049dc 100644
--- a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
+++ b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
@@ -100,6 +100,26 @@ static bool checkWaveOps(Intrinsic::ID IID) {
}
}
+// Checks to see if the status bit from a load with status
+// instruction is ever extracted. If it is, the module needs
+// to have the TiledResources shader flag set.
+bool checkIfStatusIsExtracted(const IntrinsicInst &II) {
+ [[maybe_unused]] Intrinsic::ID IID = II.getIntrinsicID();
+ assert(IID == Intrinsic::dx_resource_load_typedbuffer ||
+ IID == Intrinsic::dx_resource_load_rawbuffer &&
+ "unexpected intrinsic ID");
+ for (const User *U : II.users()) {
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U)) {
+ // Resource load operations return a {result, status} pair.
+ // Check if we extract the status
+ if (EVI->getNumIndices() == 1 && EVI->getIndices()[0] == 1)
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// Update the shader flags mask based on the given instruction.
/// \param CSF Shader flags mask to update.
/// \param I Instruction to check.
@@ -164,7 +184,7 @@ void ModuleShaderFlags::updateFunctionFlags(ComputedShaderFlags &CSF,
}
}
- if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
switch (II->getIntrinsicID()) {
default:
break;
@@ -192,6 +212,13 @@ void ModuleShaderFlags::updateFunctionFlags(ComputedShaderFlags &CSF,
DRTM[cast<TargetExtType>(II->getArgOperand(0)->getType())];
if (RTI.isTyped())
CSF.TypedUAVLoadAdditionalFormats |= RTI.getTyped().ElementCount > 1;
+ if (!CSF.TiledResources && checkIfStatusIsExtracted(*II))
+ CSF.TiledResources = true;
+ break;
+ }
+ case Intrinsic::dx_resource_load_rawbuffer: {
+ if (!CSF.TiledResources && checkIfStatusIsExtracted(*II))
+ CSF.TiledResources = true;
break;
}
}
diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.h b/llvm/lib/Target/DirectX/DXILShaderFlags.h
index f94f799..a082057 100644
--- a/llvm/lib/Target/DirectX/DXILShaderFlags.h
+++ b/llvm/lib/Target/DirectX/DXILShaderFlags.h
@@ -22,7 +22,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
-#include <memory>
namespace llvm {
class Module;
diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
index cf8b833..e1a472f 100644
--- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
+++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
@@ -82,6 +82,7 @@ enum class EntryPropsTag {
ASStateTag,
WaveSize,
EntryRootSig,
+ WaveRange = 23,
};
} // namespace
@@ -177,14 +178,15 @@ getTagValueAsMetadata(EntryPropsTag Tag, uint64_t Value, LLVMContext &Ctx) {
case EntryPropsTag::ASStateTag:
case EntryPropsTag::WaveSize:
case EntryPropsTag::EntryRootSig:
+ case EntryPropsTag::WaveRange:
llvm_unreachable("NYI: Unhandled entry property tag");
}
return MDVals;
}
-static MDTuple *
-getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags,
- const Triple::EnvironmentType ShaderProfile) {
+static MDTuple *getEntryPropAsMetadata(Module &M, const EntryProperties &EP,
+ uint64_t EntryShaderFlags,
+ const ModuleMetadataInfo &MMDI) {
SmallVector<Metadata *> MDVals;
LLVMContext &Ctx = EP.Entry->getContext();
if (EntryShaderFlags != 0)
@@ -195,12 +197,13 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags,
// FIXME: support more props.
// See https://github.com/llvm/llvm-project/issues/57948.
// Add shader kind for lib entries.
- if (ShaderProfile == Triple::EnvironmentType::Library &&
+ if (MMDI.ShaderProfile == Triple::EnvironmentType::Library &&
EP.ShaderStage != Triple::EnvironmentType::Library)
MDVals.append(getTagValueAsMetadata(EntryPropsTag::ShaderKind,
getShaderStage(EP.ShaderStage), Ctx));
if (EP.ShaderStage == Triple::EnvironmentType::Compute) {
+ // Handle mandatory "hlsl.numthreads"
MDVals.emplace_back(ConstantAsMetadata::get(ConstantInt::get(
Type::getInt32Ty(Ctx), static_cast<int>(EntryPropsTag::NumThreads))));
Metadata *NumThreadVals[] = {ConstantAsMetadata::get(ConstantInt::get(
@@ -210,8 +213,48 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags,
ConstantAsMetadata::get(ConstantInt::get(
Type::getInt32Ty(Ctx), EP.NumThreadsZ))};
MDVals.emplace_back(MDNode::get(Ctx, NumThreadVals));
+
+ // Handle optional "hlsl.wavesize". The fields are optionally represented
+ // if they are non-zero.
+ if (EP.WaveSizeMin != 0) {
+ bool IsWaveRange = VersionTuple(6, 8) <= MMDI.ShaderModelVersion;
+ bool IsWaveSize =
+ !IsWaveRange && VersionTuple(6, 6) <= MMDI.ShaderModelVersion;
+
+ if (!IsWaveRange && !IsWaveSize) {
+ reportError(M, "Shader model 6.6 or greater is required to specify "
+ "the \"hlsl.wavesize\" function attribute");
+ return nullptr;
+ }
+
+ // A range is being specified if EP.WaveSizeMax != 0
+ if (EP.WaveSizeMax && !IsWaveRange) {
+ reportError(
+ M, "Shader model 6.8 or greater is required to specify "
+ "wave size range values of the \"hlsl.wavesize\" function "
+ "attribute");
+ return nullptr;
+ }
+
+ EntryPropsTag Tag =
+ IsWaveSize ? EntryPropsTag::WaveSize : EntryPropsTag::WaveRange;
+ MDVals.emplace_back(ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(Ctx), static_cast<int>(Tag))));
+
+ SmallVector<Metadata *> WaveSizeVals = {ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(Ctx), EP.WaveSizeMin))};
+ if (IsWaveRange) {
+ WaveSizeVals.push_back(ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(Ctx), EP.WaveSizeMax)));
+ WaveSizeVals.push_back(ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(Ctx), EP.WaveSizePref)));
+ }
+
+ MDVals.emplace_back(MDNode::get(Ctx, WaveSizeVals));
+ }
}
}
+
if (MDVals.empty())
return nullptr;
return MDNode::get(Ctx, MDVals);
@@ -236,12 +279,11 @@ static MDTuple *constructEntryMetadata(const Function *EntryFn,
return MDNode::get(Ctx, MDVals);
}
-static MDTuple *emitEntryMD(const EntryProperties &EP, MDTuple *Signatures,
- MDNode *MDResources,
+static MDTuple *emitEntryMD(Module &M, const EntryProperties &EP,
+ MDTuple *Signatures, MDNode *MDResources,
const uint64_t EntryShaderFlags,
- const Triple::EnvironmentType ShaderProfile) {
- MDTuple *Properties =
- getEntryPropAsMetadata(EP, EntryShaderFlags, ShaderProfile);
+ const ModuleMetadataInfo &MMDI) {
+ MDTuple *Properties = getEntryPropAsMetadata(M, EP, EntryShaderFlags, MMDI);
return constructEntryMetadata(EP.Entry, Signatures, MDResources, Properties,
EP.Entry->getContext());
}
@@ -523,10 +565,8 @@ static void translateGlobalMetadata(Module &M, DXILResourceMap &DRM,
Twine(Triple::getEnvironmentTypeName(MMDI.ShaderProfile) +
"'"));
}
-
- EntryFnMDNodes.emplace_back(emitEntryMD(EntryProp, Signatures, ResourceMD,
- EntryShaderFlags,
- MMDI.ShaderProfile));
+ EntryFnMDNodes.emplace_back(emitEntryMD(
+ M, EntryProp, Signatures, ResourceMD, EntryShaderFlags, MMDI));
}
NamedMDNode *EntryPointsNamedMD =
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
index 26a8728..48a9085 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -1169,8 +1169,8 @@ void DXILBitcodeWriter::writeModuleInfo() {
// We need to hardcode a triple and datalayout that's compatible with the
// historical DXIL triple and datalayout from DXC.
StringRef Triple = "dxil-ms-dx";
- StringRef DL = "e-m:e-p:32:32-i1:8-i8:8-i16:32-i32:32-i64:64-"
- "f16:32-f32:32-f64:64-n8:16:32:64";
+ StringRef DL = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-"
+ "f16:16-f32:32-f64:64-n8:16:32:64";
writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, Triple, 0 /*TODO*/);
writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
index 8707b08..7cbc092e 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
@@ -18,9 +18,7 @@
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/MemoryBufferRef.h"
-#include <map>
#include <memory>
-#include <string>
#include <vector>
namespace llvm {
diff --git a/llvm/lib/Target/DirectX/DirectX.td b/llvm/lib/Target/DirectX/DirectX.td
index 4d1d45b..1717d53 100644
--- a/llvm/lib/Target/DirectX/DirectX.td
+++ b/llvm/lib/Target/DirectX/DirectX.td
@@ -22,6 +22,8 @@ include "DXILStubs.td"
// DirectX Subtarget features.
//===----------------------------------------------------------------------===//
+defm : RemapAllTargetPseudoPointerOperands<DXILClass>;
+
def DirectXInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp
index bb2efa4..401881d 100644
--- a/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp
@@ -19,6 +19,6 @@
using namespace llvm;
DirectXInstrInfo::DirectXInstrInfo(const DirectXSubtarget &STI)
- : DirectXGenInstrInfo(STI) {}
+ : DirectXGenInstrInfo(STI, RI) {}
DirectXInstrInfo::~DirectXInstrInfo() {}
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 84b1a31..fae9cbf 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -110,9 +110,9 @@ public:
void addCodeGenPrepare() override {
addPass(createDXILFinalizeLinkageLegacyPass());
addPass(createGlobalDCEPass());
+ addPass(createDXILCBufferAccessLegacyPass());
addPass(createDXILResourceAccessLegacyPass());
addPass(createDXILIntrinsicExpansionLegacyPass());
- addPass(createDXILCBufferAccessLegacyPass());
addPass(createDXILDataScalarizationLegacyPass());
ScalarizerPassOptions DxilScalarOptions;
DxilScalarOptions.ScalarizeLoadStore = true;
@@ -206,7 +206,7 @@ DirectXTargetMachine::getTargetTransformInfo(const Function &F) const {
DirectXTargetLowering::DirectXTargetLowering(const DirectXTargetMachine &TM,
const DirectXSubtarget &STI)
- : TargetLowering(TM) {
+ : TargetLowering(TM, STI) {
addRegisterClass(MVT::i32, &dxil::DXILClassRegClass);
computeRegisterProperties(STI.getRegisterInfo());
}
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index 60dfd96..a755dd5 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -29,11 +29,12 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
int OpdIdx) const {
switch (ID) {
case Intrinsic::dx_asdouble:
- case Intrinsic::dx_isinf:
- case Intrinsic::dx_isnan:
case Intrinsic::dx_firstbitlow:
- case Intrinsic::dx_firstbituhigh:
case Intrinsic::dx_firstbitshigh:
+ case Intrinsic::dx_firstbituhigh:
+ case Intrinsic::dx_isinf:
+ case Intrinsic::dx_isnan:
+ case Intrinsic::dx_legacyf16tof32:
return OpdIdx == 0;
default:
return OpdIdx == -1;
@@ -50,6 +51,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
case Intrinsic::dx_frac:
case Intrinsic::dx_isinf:
case Intrinsic::dx_isnan:
+ case Intrinsic::dx_legacyf16tof32:
case Intrinsic::dx_rsqrt:
case Intrinsic::dx_saturate:
case Intrinsic::dx_splitdouble:
@@ -62,6 +64,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
case Intrinsic::dx_wave_reduce_usum:
case Intrinsic::dx_imad:
case Intrinsic::dx_umad:
+ case Intrinsic::dx_ddx_coarse:
+ case Intrinsic::dx_ddy_coarse:
return true;
default:
return false;