diff options
Diffstat (limited to 'llvm/lib/Target/DirectX')
| -rw-r--r-- | llvm/lib/Target/DirectX/DXContainerGlobals.cpp | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DXIL.td | 27 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DXILCBufferAccess.cpp | 310 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DXILDataScalarization.cpp | 95 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DXILOpLowering.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DXILResourceAccess.cpp | 156 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DXILRootSignature.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DXILShaderFlags.cpp | 29 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DXILShaderFlags.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp | 66 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DirectX.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DirectXInstrInfo.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DirectXTargetMachine.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp | 10 |
17 files changed, 359 insertions, 371 deletions
diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index 8ace2d2..95577dd 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -29,7 +29,6 @@ #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include <cstdint> -#include <optional> using namespace llvm; using namespace llvm::dxil; @@ -194,9 +193,10 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) { dxbc::PSV::v2::ResourceBindInfo BindInfo; BindInfo.Type = Type; BindInfo.LowerBound = Binding.LowerBound; - assert(Binding.Size == UINT32_MAX || - (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX && - "Resource range is too large"); + assert( + (Binding.Size == UINT32_MAX || + (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX) && + "Resource range is too large"); BindInfo.UpperBound = (Binding.Size == UINT32_MAX) ? UINT32_MAX : Binding.LowerBound + Binding.Size - 1; @@ -284,6 +284,13 @@ void DXContainerGlobals::addPipelineStateValidationInfo( PSV.BaseData.NumThreadsX = MMI.EntryPropertyVec[0].NumThreadsX; PSV.BaseData.NumThreadsY = MMI.EntryPropertyVec[0].NumThreadsY; PSV.BaseData.NumThreadsZ = MMI.EntryPropertyVec[0].NumThreadsZ; + if (MMI.EntryPropertyVec[0].WaveSizeMin) { + PSV.BaseData.MinimumWaveLaneCount = MMI.EntryPropertyVec[0].WaveSizeMin; + PSV.BaseData.MaximumWaveLaneCount = + MMI.EntryPropertyVec[0].WaveSizeMax + ? MMI.EntryPropertyVec[0].WaveSizeMax + : MMI.EntryPropertyVec[0].WaveSizeMin; + } break; default: break; diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 7ae500a..8b286626 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -930,6 +930,24 @@ def Discard : DXILOp<82, discard> { let stages = [Stages<DXIL1_0, [pixel]>]; } +def DerivCoarseX : DXILOp<83, unary> { + let Doc = "computes the rate of change per stamp in x direction"; + let intrinsics = [IntrinSelect<int_dx_ddx_coarse>]; + let arguments = [OverloadTy]; + let result = OverloadTy; + let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>]; + let stages = [Stages<DXIL1_0, [library, pixel]>]; +} + +def DerivCoarseY : DXILOp<84, unary> { + let Doc = "computes the rate of change per stamp in y direction"; + let intrinsics = [IntrinSelect<int_dx_ddy_coarse>]; + let arguments = [OverloadTy]; + let result = OverloadTy; + let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>]; + let stages = [Stages<DXIL1_0, [library, pixel]>]; +} + def ThreadId : DXILOp<93, threadId> { let Doc = "Reads the thread ID"; let intrinsics = [IntrinSelect<int_dx_thread_id>]; @@ -1079,6 +1097,15 @@ def WaveActiveOp : DXILOp<119, waveActiveOp> { let attributes = [Attributes<DXIL1_0, []>]; } +def LegacyF16ToF32 : DXILOp<131, legacyF16ToF32> { + let Doc = "returns the float16 stored in the low-half of the uint converted " + "to a float"; + let intrinsics = [IntrinSelect<int_dx_legacyf16tof32>]; + let arguments = [Int32Ty]; + let result = FloatTy; + let stages = [Stages<DXIL1_0, [all_stages]>]; +} + def WaveAllBitCount : DXILOp<135, waveAllOp> { let Doc = "returns the count of bits set to 1 across the wave"; let intrinsics = [IntrinSelect<int_dx_wave_active_countbits>]; diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp index 4427797..5624532 100644 --- a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp +++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp @@ -8,11 +8,13 @@ #include "DXILCBufferAccess.h" #include "DirectX.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/Frontend/HLSL/CBuffer.h" #include "llvm/Frontend/HLSL/HLSLResource.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsDirectX.h" +#include "llvm/IR/ReplaceConstant.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/FormatVariadic.h" @@ -21,297 +23,41 @@ #define DEBUG_TYPE "dxil-cbuffer-access" using namespace llvm; -namespace { -/// Helper for building a `load.cbufferrow` intrinsic given a simple type. -struct CBufferRowIntrin { - Intrinsic::ID IID; - Type *RetTy; - unsigned int EltSize; - unsigned int NumElts; - - CBufferRowIntrin(const DataLayout &DL, Type *Ty) { - assert(Ty == Ty->getScalarType() && "Expected scalar type"); - - switch (DL.getTypeSizeInBits(Ty)) { - case 16: - IID = Intrinsic::dx_resource_load_cbufferrow_8; - RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty); - EltSize = 2; - NumElts = 8; - break; - case 32: - IID = Intrinsic::dx_resource_load_cbufferrow_4; - RetTy = StructType::get(Ty, Ty, Ty, Ty); - EltSize = 4; - NumElts = 4; - break; - case 64: - IID = Intrinsic::dx_resource_load_cbufferrow_2; - RetTy = StructType::get(Ty, Ty); - EltSize = 8; - NumElts = 2; - break; - default: - llvm_unreachable("Only 16, 32, and 64 bit types supported"); - } - } -}; - -// Helper for creating CBuffer handles and loading data from them -struct CBufferResource { - GlobalVariable *GVHandle; - GlobalVariable *Member; - size_t MemberOffset; - - LoadInst *Handle; - - CBufferResource(GlobalVariable *GVHandle, GlobalVariable *Member, - size_t MemberOffset) - : GVHandle(GVHandle), Member(Member), MemberOffset(MemberOffset) {} - - const DataLayout &getDataLayout() { return GVHandle->getDataLayout(); } - Type *getValueType() { return Member->getValueType(); } - iterator_range<ConstantDataSequential::user_iterator> users() { - return Member->users(); - } - - /// Get the byte offset of a Pointer-typed Value * `Val` relative to Member. - /// `Val` can either be Member itself, or a GEP of a constant offset from - /// Member - size_t getOffsetForCBufferGEP(Value *Val) { - assert(isa<PointerType>(Val->getType()) && - "Expected a pointer-typed value"); - - if (Val == Member) - return 0; - - if (auto *GEP = dyn_cast<GEPOperator>(Val)) { - // Since we should always have a constant offset, we should only ever have - // a single GEP of indirection from the Global. - assert(GEP->getPointerOperand() == Member && - "Indirect access to resource handle"); - - const DataLayout &DL = getDataLayout(); - APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); - bool Success = GEP->accumulateConstantOffset(DL, ConstantOffset); - (void)Success; - assert(Success && "Offsets into cbuffer globals must be constant"); - - if (auto *ATy = dyn_cast<ArrayType>(Member->getValueType())) - ConstantOffset = - hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy); - - return ConstantOffset.getZExtValue(); - } - - llvm_unreachable("Expected Val to be a GlobalVariable or GEP"); - } - - /// Create a handle for this cbuffer resource using the IRBuilder `Builder` - /// and sets the handle as the current one to use for subsequent calls to - /// `loadValue` - void createAndSetCurrentHandle(IRBuilder<> &Builder) { - Handle = Builder.CreateLoad(GVHandle->getValueType(), GVHandle, - GVHandle->getName()); +static void replaceUsersOfGlobal(GlobalVariable *Global, + GlobalVariable *HandleGV, size_t Offset) { + for (Use &U : make_early_inc_range(Global->uses())) { + auto UseInst = dyn_cast<Instruction>(U.getUser()); + // TODO: Constants? Metadata? + assert(UseInst && "Non-instruction use of cbuffer"); + + IRBuilder<> Builder(UseInst); + LoadInst *Handle = Builder.CreateLoad(HandleGV->getValueType(), HandleGV, + HandleGV->getName()); + Value *Ptr = Builder.CreateIntrinsic( + Global->getType(), Intrinsic::dx_resource_getpointer, + ArrayRef<Value *>{Handle, + ConstantInt::get(Builder.getInt32Ty(), Offset)}); + U.set(Ptr); } - /// Load a value of type `Ty` at offset `Offset` using the handle from the - /// last call to `createAndSetCurrentHandle` - Value *loadValue(IRBuilder<> &Builder, Type *Ty, size_t Offset, - const Twine &Name = "") { - assert(Handle && - "Expected a handle for this cbuffer global resource to be created " - "before loading a value from it"); - const DataLayout &DL = getDataLayout(); - - size_t TargetOffset = MemberOffset + Offset; - CBufferRowIntrin Intrin(DL, Ty->getScalarType()); - // The cbuffer consists of some number of 16-byte rows. - unsigned int CurrentRow = TargetOffset / hlsl::CBufferRowSizeInBytes; - unsigned int CurrentIndex = - (TargetOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize; - - auto *CBufLoad = Builder.CreateIntrinsic( - Intrin.RetTy, Intrin.IID, - {Handle, ConstantInt::get(Builder.getInt32Ty(), CurrentRow)}, nullptr, - Name + ".load"); - auto *Elt = Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, - Name + ".extract"); - - Value *Result = nullptr; - unsigned int Remaining = - ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1; - - if (Remaining == 0) { - // We only have a single element, so we're done. - Result = Elt; - - // However, if we loaded a <1 x T>, then we need to adjust the type here. - if (auto *VT = dyn_cast<FixedVectorType>(Ty)) { - assert(VT->getNumElements() == 1 && - "Can't have multiple elements here"); - Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result, - Builder.getInt32(0), Name); - } - return Result; - } - - // Walk each element and extract it, wrapping to new rows as needed. - SmallVector<Value *> Extracts{Elt}; - while (Remaining--) { - CurrentIndex %= Intrin.NumElts; - - if (CurrentIndex == 0) - CBufLoad = Builder.CreateIntrinsic( - Intrin.RetTy, Intrin.IID, - {Handle, ConstantInt::get(Builder.getInt32Ty(), ++CurrentRow)}, - nullptr, Name + ".load"); - - Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, - Name + ".extract")); - } - - // Finally, we build up the original loaded value. - Result = PoisonValue::get(Ty); - for (int I = 0, E = Extracts.size(); I < E; ++I) - Result = - Builder.CreateInsertElement(Result, Extracts[I], Builder.getInt32(I), - Name + formatv(".upto{}", I)); - return Result; - } -}; - -} // namespace - -/// Replace load via cbuffer global with a load from the cbuffer handle itself. -static void replaceLoad(LoadInst *LI, CBufferResource &CBR, - SmallVectorImpl<WeakTrackingVH> &DeadInsts) { - size_t Offset = CBR.getOffsetForCBufferGEP(LI->getPointerOperand()); - IRBuilder<> Builder(LI); - CBR.createAndSetCurrentHandle(Builder); - Value *Result = CBR.loadValue(Builder, LI->getType(), Offset, LI->getName()); - LI->replaceAllUsesWith(Result); - DeadInsts.push_back(LI); -} - -/// This function recursively copies N array elements from the cbuffer resource -/// CBR to the MemCpy Destination. Recursion is used to unravel multidimensional -/// arrays into a sequence of scalar/vector extracts and stores. -static void copyArrayElemsForMemCpy(IRBuilder<> &Builder, MemCpyInst *MCI, - CBufferResource &CBR, ArrayType *ArrTy, - size_t ArrOffset, size_t N, - const Twine &Name = "") { - const DataLayout &DL = MCI->getDataLayout(); - Type *ElemTy = ArrTy->getElementType(); - size_t ElemTySize = DL.getTypeAllocSize(ElemTy); - for (unsigned I = 0; I < N; ++I) { - size_t Offset = ArrOffset + I * ElemTySize; - - // Recursively copy nested arrays - if (ArrayType *ElemArrTy = dyn_cast<ArrayType>(ElemTy)) { - copyArrayElemsForMemCpy(Builder, MCI, CBR, ElemArrTy, Offset, - ElemArrTy->getNumElements(), Name); - continue; - } - - // Load CBuffer value and store it in Dest - APInt CBufArrayOffset( - DL.getIndexTypeSizeInBits(MCI->getSource()->getType()), Offset); - CBufArrayOffset = - hlsl::translateCBufArrayOffset(DL, CBufArrayOffset, ArrTy); - Value *CBufferVal = - CBR.loadValue(Builder, ElemTy, CBufArrayOffset.getZExtValue(), Name); - Value *GEP = - Builder.CreateInBoundsGEP(Builder.getInt8Ty(), MCI->getDest(), - {Builder.getInt32(Offset)}, Name + ".dest"); - Builder.CreateStore(CBufferVal, GEP, MCI->isVolatile()); - } -} - -/// Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle -/// itself. Assumes the cbuffer global is an array, and the length of bytes to -/// copy is divisible by array element allocation size. -/// The memcpy source must also be a direct cbuffer global reference, not a GEP. -static void replaceMemCpy(MemCpyInst *MCI, CBufferResource &CBR) { - - ArrayType *ArrTy = dyn_cast<ArrayType>(CBR.getValueType()); - assert(ArrTy && "MemCpy lowering is only supported for array types"); - - // This assumption vastly simplifies the implementation - if (MCI->getSource() != CBR.Member) - reportFatalUsageError( - "Expected MemCpy source to be a cbuffer global variable"); - - ConstantInt *Length = dyn_cast<ConstantInt>(MCI->getLength()); - uint64_t ByteLength = Length->getZExtValue(); - - // If length to copy is zero, no memcpy is needed - if (ByteLength == 0) { - MCI->eraseFromParent(); - return; - } - - const DataLayout &DL = CBR.getDataLayout(); - - Type *ElemTy = ArrTy->getElementType(); - size_t ElemSize = DL.getTypeAllocSize(ElemTy); - assert(ByteLength % ElemSize == 0 && - "Length of bytes to MemCpy must be divisible by allocation size of " - "source/destination array elements"); - size_t ElemsToCpy = ByteLength / ElemSize; - - IRBuilder<> Builder(MCI); - CBR.createAndSetCurrentHandle(Builder); - - copyArrayElemsForMemCpy(Builder, MCI, CBR, ArrTy, 0, ElemsToCpy, - "memcpy." + MCI->getDest()->getName() + "." + - MCI->getSource()->getName()); - - MCI->eraseFromParent(); -} - -static void replaceAccessesWithHandle(CBufferResource &CBR) { - SmallVector<WeakTrackingVH> DeadInsts; - - SmallVector<User *> ToProcess{CBR.users()}; - while (!ToProcess.empty()) { - User *Cur = ToProcess.pop_back_val(); - - // If we have a load instruction, replace the access. - if (auto *LI = dyn_cast<LoadInst>(Cur)) { - replaceLoad(LI, CBR, DeadInsts); - continue; - } - - // If we have a memcpy instruction, replace it with multiple accesses and - // subsequent stores to the destination - if (auto *MCI = dyn_cast<MemCpyInst>(Cur)) { - replaceMemCpy(MCI, CBR); - continue; - } - - // Otherwise, walk users looking for a load... - if (isa<GetElementPtrInst>(Cur) || isa<GEPOperator>(Cur)) { - ToProcess.append(Cur->user_begin(), Cur->user_end()); - continue; - } - - llvm_unreachable("Unexpected user of Global"); - } - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); + Global->removeFromParent(); } static bool replaceCBufferAccesses(Module &M) { - std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(M); + std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get( + M, [](Type *Ty) { return isa<llvm::dxil::PaddingExtType>(Ty); }); if (!CBufMD) return false; + SmallVector<Constant *> CBufferGlobals; + for (const hlsl::CBufferMapping &Mapping : *CBufMD) + for (const hlsl::CBufferMember &Member : Mapping.Members) + CBufferGlobals.push_back(Member.GV); + convertUsersOfConstantsToInstructions(CBufferGlobals); + for (const hlsl::CBufferMapping &Mapping : *CBufMD) - for (const hlsl::CBufferMember &Member : Mapping.Members) { - CBufferResource CBR(Mapping.Handle, Member.GV, Member.Offset); - replaceAccessesWithHandle(CBR); - Member.GV->removeFromParent(); - } + for (const hlsl::CBufferMember &Member : Mapping.Members) + replaceUsersOfGlobal(Member.GV, Mapping.Handle, Member.Offset); CBufMD->eraseFromModule(); return true; diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index d507d71..5f18c37 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -29,20 +29,6 @@ static const int MaxVecSize = 4; using namespace llvm; -// Recursively creates an array-like version of a given vector type. -static Type *equivalentArrayTypeFromVector(Type *T) { - if (auto *VecTy = dyn_cast<VectorType>(T)) - return ArrayType::get(VecTy->getElementType(), - dyn_cast<FixedVectorType>(VecTy)->getNumElements()); - if (auto *ArrayTy = dyn_cast<ArrayType>(T)) { - Type *NewElementType = - equivalentArrayTypeFromVector(ArrayTy->getElementType()); - return ArrayType::get(NewElementType, ArrayTy->getNumElements()); - } - // If it's not a vector or array, return the original type. - return T; -} - class DXILDataScalarizationLegacy : public ModulePass { public: @@ -121,12 +107,25 @@ DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) { static bool isVectorOrArrayOfVectors(Type *T) { if (isa<VectorType>(T)) return true; - if (ArrayType *ArrType = dyn_cast<ArrayType>(T)) - return isa<VectorType>(ArrType->getElementType()) || - isVectorOrArrayOfVectors(ArrType->getElementType()); + if (ArrayType *ArrayTy = dyn_cast<ArrayType>(T)) + return isVectorOrArrayOfVectors(ArrayTy->getElementType()); return false; } +// Recursively creates an array-like version of a given vector type. +static Type *equivalentArrayTypeFromVector(Type *T) { + if (auto *VecTy = dyn_cast<VectorType>(T)) + return ArrayType::get(VecTy->getElementType(), + dyn_cast<FixedVectorType>(VecTy)->getNumElements()); + if (auto *ArrayTy = dyn_cast<ArrayType>(T)) { + Type *NewElementType = + equivalentArrayTypeFromVector(ArrayTy->getElementType()); + return ArrayType::get(NewElementType, ArrayTy->getNumElements()); + } + // If it's not a vector or array, return the original type. + return T; +} + bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) { Type *AllocatedType = AI.getAllocatedType(); if (!isVectorOrArrayOfVectors(AllocatedType)) @@ -135,7 +134,7 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) { IRBuilder<> Builder(&AI); Type *NewType = equivalentArrayTypeFromVector(AllocatedType); AllocaInst *ArrAlloca = - Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarize"); + Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarized"); ArrAlloca->setAlignment(AI.getAlign()); AI.replaceAllUsesWith(ArrAlloca); AI.eraseFromParent(); @@ -303,42 +302,44 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { GEPOperator *GOp = cast<GEPOperator>(&GEPI); Value *PtrOperand = GOp->getPointerOperand(); - Type *NewGEPType = GOp->getSourceElementType(); - bool NeedsTransform = false; - - // Unwrap GEP ConstantExprs to find the base operand and element type - while (auto *CE = dyn_cast<ConstantExpr>(PtrOperand)) { - if (auto *GEPCE = dyn_cast<GEPOperator>(CE)) { - GOp = GEPCE; - PtrOperand = GEPCE->getPointerOperand(); - NewGEPType = GEPCE->getSourceElementType(); - } else - break; + Type *GEPType = GOp->getSourceElementType(); + + // Replace a GEP ConstantExpr pointer operand with a GEP instruction so that + // it can be visited + if (auto *PtrOpGEPCE = dyn_cast<ConstantExpr>(PtrOperand); + PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) { + GetElementPtrInst *OldGEPI = + cast<GetElementPtrInst>(PtrOpGEPCE->getAsInstruction()); + OldGEPI->insertBefore(GEPI.getIterator()); + + IRBuilder<> Builder(&GEPI); + SmallVector<Value *> Indices(GEPI.indices()); + Value *NewGEP = + Builder.CreateGEP(GEPI.getSourceElementType(), OldGEPI, Indices, + GEPI.getName(), GEPI.getNoWrapFlags()); + assert(isa<GetElementPtrInst>(NewGEP) && + "Expected newly-created GEP to be an instruction"); + GetElementPtrInst *NewGEPI = cast<GetElementPtrInst>(NewGEP); + + GEPI.replaceAllUsesWith(NewGEPI); + GEPI.eraseFromParent(); + visitGetElementPtrInst(*OldGEPI); + visitGetElementPtrInst(*NewGEPI); + return true; } - if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) { - NewGEPType = NewGlobal->getValueType(); - PtrOperand = NewGlobal; - NeedsTransform = true; - } else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) { - Type *AllocatedType = Alloca->getAllocatedType(); - if (isa<ArrayType>(AllocatedType) && - AllocatedType != GOp->getResultElementType()) { - NewGEPType = AllocatedType; - NeedsTransform = true; - } - } + Type *NewGEPType = equivalentArrayTypeFromVector(GEPType); + Value *NewPtrOperand = PtrOperand; + if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) + NewPtrOperand = NewGlobal; + bool NeedsTransform = NewPtrOperand != PtrOperand || NewGEPType != GEPType; if (!NeedsTransform) return false; - // Keep scalar GEPs scalar; dxil-flatten-arrays will do flattening later - if (!isa<ArrayType>(GOp->getSourceElementType())) - NewGEPType = GOp->getSourceElementType(); - IRBuilder<> Builder(&GEPI); - SmallVector<Value *, MaxVecSize> Indices(GOp->indices()); - Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices, + SmallVector<Value *, MaxVecSize> Indices(GOp->idx_begin(), GOp->idx_end()); + Value *NewGEP = Builder.CreateGEP(NewGEPType, NewPtrOperand, Indices, GOp->getName(), GOp->getNoWrapFlags()); GOp->replaceAllUsesWith(NewGEP); diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index ebb7c26..e0d2dbd 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -197,6 +197,7 @@ static Value *expand16BitIsNormal(CallInst *Orig) { static bool isIntrinsicExpansion(Function &F) { switch (F.getIntrinsicID()) { + case Intrinsic::assume: case Intrinsic::abs: case Intrinsic::atan2: case Intrinsic::exp: @@ -988,6 +989,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::abs: Result = expandAbs(Orig); break; + case Intrinsic::assume: + Orig->eraseFromParent(); + return true; case Intrinsic::atan2: Result = expandAtan2Intrinsic(Orig); break; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 8720460..e46a393 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -904,8 +904,6 @@ public: case Intrinsic::dx_resource_casthandle: // NOTE: llvm.dbg.value is supported as is in DXIL. case Intrinsic::dbg_value: - // NOTE: llvm.assume is supported as is in DXIL. - case Intrinsic::assume: case Intrinsic::not_intrinsic: if (F.use_empty()) F.eraseFromParent(); diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp index 6579d34..057d87b 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp +++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp @@ -10,6 +10,7 @@ #include "DirectX.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/DXILResource.h" +#include "llvm/Frontend/HLSL/HLSLResource.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -20,6 +21,7 @@ #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/User.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Transforms/Utils/ValueMapper.h" #define DEBUG_TYPE "dxil-resource-access" @@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset, APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); if (GEP->accumulateConstantOffset(DL, ConstantOffset)) { APInt Scaled = ConstantOffset.udiv(ScalarSize); - return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled); + return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled); } - auto IndexIt = GEP->idx_begin(); - assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 && - "GEP is not indexing through pointer"); - ++IndexIt; - Value *Offset = *IndexIt; - assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP"); - return Offset; + unsigned NumIndices = GEP->getNumIndices(); + + // If we have a single index we're indexing into a top level array. This + // generally only happens with cbuffers. + if (NumIndices == 1) + return *GEP->idx_begin(); + + // If we have two indices, this should be a simple access through a pointer. + if (NumIndices == 2) { + auto IndexIt = GEP->idx_begin(); + assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 && + "GEP is not indexing through pointer"); + ++IndexIt; + Value *Offset = *IndexIt; + assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP"); + return Offset; + } + + llvm_unreachable("Unhandled GEP structure for resource access"); } static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI, @@ -171,6 +185,127 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) { LI->replaceAllUsesWith(V); } +namespace { +/// Helper for building a `load.cbufferrow` intrinsic given a simple type. +struct CBufferRowIntrin { + Intrinsic::ID IID; + Type *RetTy; + unsigned int EltSize; + unsigned int NumElts; + + CBufferRowIntrin(const DataLayout &DL, Type *Ty) { + assert(Ty == Ty->getScalarType() && "Expected scalar type"); + + switch (DL.getTypeSizeInBits(Ty)) { + case 16: + IID = Intrinsic::dx_resource_load_cbufferrow_8; + RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty); + EltSize = 2; + NumElts = 8; + break; + case 32: + IID = Intrinsic::dx_resource_load_cbufferrow_4; + RetTy = StructType::get(Ty, Ty, Ty, Ty); + EltSize = 4; + NumElts = 4; + break; + case 64: + IID = Intrinsic::dx_resource_load_cbufferrow_2; + RetTy = StructType::get(Ty, Ty); + EltSize = 8; + NumElts = 2; + break; + default: + llvm_unreachable("Only 16, 32, and 64 bit types supported"); + } + } +}; +} // namespace + +static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset, + dxil::ResourceTypeInfo &RTI) { + const DataLayout &DL = LI->getDataLayout(); + + Type *Ty = LI->getType(); + assert(!isa<StructType>(Ty) && "Structs not handled yet"); + CBufferRowIntrin Intrin(DL, Ty->getScalarType()); + + StringRef Name = LI->getName(); + Value *Handle = II->getOperand(0); + + IRBuilder<> Builder(LI); + + ConstantInt *GlobalOffset = dyn_cast<ConstantInt>(II->getOperand(1)); + assert(GlobalOffset && "CBuffer getpointer index must be constant"); + + unsigned int FixedOffset = GlobalOffset->getZExtValue(); + // If we have a further constant offset we can just fold it in to the fixed + // offset. + if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) { + FixedOffset += ConstOffset->getZExtValue(); + Offset = nullptr; + } + + Value *CurrentRow = ConstantInt::get( + Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes); + unsigned int CurrentIndex = + (FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize; + + assert(!(CurrentIndex && Offset) && + "Dynamic indexing into elements of cbuffer rows is not supported"); + // At this point if we have a non-constant offset it has to be an array + // offset, so we can assume that it's a multiple of the row size. + if (Offset) + CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset; + + auto *CBufLoad = Builder.CreateIntrinsic( + Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load"); + auto *Elt = + Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract"); + + // At this point we've loaded the first scalar of our result, but our original + // type may have been a vector. + unsigned int Remaining = + ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1; + if (Remaining == 0) { + // We only have a single element, so we're done. + Value *Result = Elt; + + // However, if we loaded a <1 x T>, then we need to adjust the type. + if (auto *VT = dyn_cast<FixedVectorType>(Ty)) { + assert(VT->getNumElements() == 1 && "Can't have multiple elements here"); + Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result, + Builder.getInt32(0), Name); + } + LI->replaceAllUsesWith(Result); + return; + } + + // Walk each element and extract it, wrapping to new rows as needed. + SmallVector<Value *> Extracts{Elt}; + while (Remaining--) { + CurrentIndex %= Intrin.NumElts; + + if (CurrentIndex == 0) { + CurrentRow = Builder.CreateAdd(CurrentRow, + ConstantInt::get(Builder.getInt32Ty(), 1)); + CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID, + {Handle, CurrentRow}, nullptr, + Name + ".load"); + } + + Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, + Name + ".extract")); + } + + // Finally, we build up the original loaded value. + Value *Result = PoisonValue::get(Ty); + for (int I = 0, E = Extracts.size(); I < E; ++I) + Result = Builder.CreateInsertElement( + Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I)); + LI->replaceAllUsesWith(Result); +} + static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, dxil::ResourceTypeInfo &RTI) { switch (RTI.getResourceKind()) { @@ -179,6 +314,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, case dxil::ResourceKind::RawBuffer: case dxil::ResourceKind::StructuredBuffer: return createRawLoad(II, LI, Offset); + case dxil::ResourceKind::CBuffer: + return createCBufferLoad(II, LI, Offset, RTI); case dxil::ResourceKind::Texture1D: case dxil::ResourceKind::Texture2D: case dxil::ResourceKind::Texture2DMS: @@ -190,9 +327,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, case dxil::ResourceKind::TextureCubeArray: case dxil::ResourceKind::FeedbackTexture2D: case dxil::ResourceKind::FeedbackTexture2DArray: - case dxil::ResourceKind::CBuffer: case dxil::ResourceKind::TBuffer: - // TODO: handle these + reportFatalUsageError("Load not yet implemented for resource type"); return; case dxil::ResourceKind::Sampler: case dxil::ResourceKind::RTAccelerationStructure: diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.h b/llvm/lib/Target/DirectX/DXILRootSignature.h index b990b6c..ec82aa9 100644 --- a/llvm/lib/Target/DirectX/DXILRootSignature.h +++ b/llvm/lib/Target/DirectX/DXILRootSignature.h @@ -21,7 +21,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/MC/DXContainerRootSignature.h" #include "llvm/Pass.h" -#include <optional> namespace llvm { namespace dxil { diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp index ce6e812..e0049dc 100644 --- a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp +++ b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp @@ -100,6 +100,26 @@ static bool checkWaveOps(Intrinsic::ID IID) { } } +// Checks to see if the status bit from a load with status +// instruction is ever extracted. If it is, the module needs +// to have the TiledResources shader flag set. +bool checkIfStatusIsExtracted(const IntrinsicInst &II) { + [[maybe_unused]] Intrinsic::ID IID = II.getIntrinsicID(); + assert(IID == Intrinsic::dx_resource_load_typedbuffer || + IID == Intrinsic::dx_resource_load_rawbuffer && + "unexpected intrinsic ID"); + for (const User *U : II.users()) { + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U)) { + // Resource load operations return a {result, status} pair. + // Check if we extract the status + if (EVI->getNumIndices() == 1 && EVI->getIndices()[0] == 1) + return true; + } + } + + return false; +} + /// Update the shader flags mask based on the given instruction. /// \param CSF Shader flags mask to update. /// \param I Instruction to check. @@ -164,7 +184,7 @@ void ModuleShaderFlags::updateFunctionFlags(ComputedShaderFlags &CSF, } } - if (auto *II = dyn_cast<IntrinsicInst>(&I)) { + if (const auto *II = dyn_cast<IntrinsicInst>(&I)) { switch (II->getIntrinsicID()) { default: break; @@ -192,6 +212,13 @@ void ModuleShaderFlags::updateFunctionFlags(ComputedShaderFlags &CSF, DRTM[cast<TargetExtType>(II->getArgOperand(0)->getType())]; if (RTI.isTyped()) CSF.TypedUAVLoadAdditionalFormats |= RTI.getTyped().ElementCount > 1; + if (!CSF.TiledResources && checkIfStatusIsExtracted(*II)) + CSF.TiledResources = true; + break; + } + case Intrinsic::dx_resource_load_rawbuffer: { + if (!CSF.TiledResources && checkIfStatusIsExtracted(*II)) + CSF.TiledResources = true; break; } } diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.h b/llvm/lib/Target/DirectX/DXILShaderFlags.h index f94f799..a082057 100644 --- a/llvm/lib/Target/DirectX/DXILShaderFlags.h +++ b/llvm/lib/Target/DirectX/DXILShaderFlags.h @@ -22,7 +22,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <cstdint> -#include <memory> namespace llvm { class Module; diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index cf8b833..e1a472f 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -82,6 +82,7 @@ enum class EntryPropsTag { ASStateTag, WaveSize, EntryRootSig, + WaveRange = 23, }; } // namespace @@ -177,14 +178,15 @@ getTagValueAsMetadata(EntryPropsTag Tag, uint64_t Value, LLVMContext &Ctx) { case EntryPropsTag::ASStateTag: case EntryPropsTag::WaveSize: case EntryPropsTag::EntryRootSig: + case EntryPropsTag::WaveRange: llvm_unreachable("NYI: Unhandled entry property tag"); } return MDVals; } -static MDTuple * -getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags, - const Triple::EnvironmentType ShaderProfile) { +static MDTuple *getEntryPropAsMetadata(Module &M, const EntryProperties &EP, + uint64_t EntryShaderFlags, + const ModuleMetadataInfo &MMDI) { SmallVector<Metadata *> MDVals; LLVMContext &Ctx = EP.Entry->getContext(); if (EntryShaderFlags != 0) @@ -195,12 +197,13 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags, // FIXME: support more props. // See https://github.com/llvm/llvm-project/issues/57948. // Add shader kind for lib entries. - if (ShaderProfile == Triple::EnvironmentType::Library && + if (MMDI.ShaderProfile == Triple::EnvironmentType::Library && EP.ShaderStage != Triple::EnvironmentType::Library) MDVals.append(getTagValueAsMetadata(EntryPropsTag::ShaderKind, getShaderStage(EP.ShaderStage), Ctx)); if (EP.ShaderStage == Triple::EnvironmentType::Compute) { + // Handle mandatory "hlsl.numthreads" MDVals.emplace_back(ConstantAsMetadata::get(ConstantInt::get( Type::getInt32Ty(Ctx), static_cast<int>(EntryPropsTag::NumThreads)))); Metadata *NumThreadVals[] = {ConstantAsMetadata::get(ConstantInt::get( @@ -210,8 +213,48 @@ getEntryPropAsMetadata(const EntryProperties &EP, uint64_t EntryShaderFlags, ConstantAsMetadata::get(ConstantInt::get( Type::getInt32Ty(Ctx), EP.NumThreadsZ))}; MDVals.emplace_back(MDNode::get(Ctx, NumThreadVals)); + + // Handle optional "hlsl.wavesize". The fields are optionally represented + // if they are non-zero. + if (EP.WaveSizeMin != 0) { + bool IsWaveRange = VersionTuple(6, 8) <= MMDI.ShaderModelVersion; + bool IsWaveSize = + !IsWaveRange && VersionTuple(6, 6) <= MMDI.ShaderModelVersion; + + if (!IsWaveRange && !IsWaveSize) { + reportError(M, "Shader model 6.6 or greater is required to specify " + "the \"hlsl.wavesize\" function attribute"); + return nullptr; + } + + // A range is being specified if EP.WaveSizeMax != 0 + if (EP.WaveSizeMax && !IsWaveRange) { + reportError( + M, "Shader model 6.8 or greater is required to specify " + "wave size range values of the \"hlsl.wavesize\" function " + "attribute"); + return nullptr; + } + + EntryPropsTag Tag = + IsWaveSize ? EntryPropsTag::WaveSize : EntryPropsTag::WaveRange; + MDVals.emplace_back(ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), static_cast<int>(Tag)))); + + SmallVector<Metadata *> WaveSizeVals = {ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), EP.WaveSizeMin))}; + if (IsWaveRange) { + WaveSizeVals.push_back(ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), EP.WaveSizeMax))); + WaveSizeVals.push_back(ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Ctx), EP.WaveSizePref))); + } + + MDVals.emplace_back(MDNode::get(Ctx, WaveSizeVals)); + } } } + if (MDVals.empty()) return nullptr; return MDNode::get(Ctx, MDVals); @@ -236,12 +279,11 @@ static MDTuple *constructEntryMetadata(const Function *EntryFn, return MDNode::get(Ctx, MDVals); } -static MDTuple *emitEntryMD(const EntryProperties &EP, MDTuple *Signatures, - MDNode *MDResources, +static MDTuple *emitEntryMD(Module &M, const EntryProperties &EP, + MDTuple *Signatures, MDNode *MDResources, const uint64_t EntryShaderFlags, - const Triple::EnvironmentType ShaderProfile) { - MDTuple *Properties = - getEntryPropAsMetadata(EP, EntryShaderFlags, ShaderProfile); + const ModuleMetadataInfo &MMDI) { + MDTuple *Properties = getEntryPropAsMetadata(M, EP, EntryShaderFlags, MMDI); return constructEntryMetadata(EP.Entry, Signatures, MDResources, Properties, EP.Entry->getContext()); } @@ -523,10 +565,8 @@ static void translateGlobalMetadata(Module &M, DXILResourceMap &DRM, Twine(Triple::getEnvironmentTypeName(MMDI.ShaderProfile) + "'")); } - - EntryFnMDNodes.emplace_back(emitEntryMD(EntryProp, Signatures, ResourceMD, - EntryShaderFlags, - MMDI.ShaderProfile)); + EntryFnMDNodes.emplace_back(emitEntryMD( + M, EntryProp, Signatures, ResourceMD, EntryShaderFlags, MMDI)); } NamedMDNode *EntryPointsNamedMD = diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index 26a8728..48a9085 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -1169,8 +1169,8 @@ void DXILBitcodeWriter::writeModuleInfo() { // We need to hardcode a triple and datalayout that's compatible with the // historical DXIL triple and datalayout from DXC. StringRef Triple = "dxil-ms-dx"; - StringRef DL = "e-m:e-p:32:32-i1:8-i8:8-i16:32-i32:32-i64:64-" - "f16:32-f32:32-f64:64-n8:16:32:64"; + StringRef DL = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-" + "f16:16-f32:32-f64:64-n8:16:32:64"; writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, Triple, 0 /*TODO*/); writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h index 8707b08..7cbc092e 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h @@ -18,9 +18,7 @@ #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/MemoryBufferRef.h" -#include <map> #include <memory> -#include <string> #include <vector> namespace llvm { diff --git a/llvm/lib/Target/DirectX/DirectX.td b/llvm/lib/Target/DirectX/DirectX.td index 4d1d45b..1717d53 100644 --- a/llvm/lib/Target/DirectX/DirectX.td +++ b/llvm/lib/Target/DirectX/DirectX.td @@ -22,6 +22,8 @@ include "DXILStubs.td" // DirectX Subtarget features. //===----------------------------------------------------------------------===// +defm : RemapAllTargetPseudoPointerOperands<DXILClass>; + def DirectXInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp index bb2efa4..401881d 100644 --- a/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp @@ -19,6 +19,6 @@ using namespace llvm; DirectXInstrInfo::DirectXInstrInfo(const DirectXSubtarget &STI) - : DirectXGenInstrInfo(STI) {} + : DirectXGenInstrInfo(STI, RI) {} DirectXInstrInfo::~DirectXInstrInfo() {} diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index 84b1a31..fae9cbf 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -110,9 +110,9 @@ public: void addCodeGenPrepare() override { addPass(createDXILFinalizeLinkageLegacyPass()); addPass(createGlobalDCEPass()); + addPass(createDXILCBufferAccessLegacyPass()); addPass(createDXILResourceAccessLegacyPass()); addPass(createDXILIntrinsicExpansionLegacyPass()); - addPass(createDXILCBufferAccessLegacyPass()); addPass(createDXILDataScalarizationLegacyPass()); ScalarizerPassOptions DxilScalarOptions; DxilScalarOptions.ScalarizeLoadStore = true; @@ -206,7 +206,7 @@ DirectXTargetMachine::getTargetTransformInfo(const Function &F) const { DirectXTargetLowering::DirectXTargetLowering(const DirectXTargetMachine &TM, const DirectXSubtarget &STI) - : TargetLowering(TM) { + : TargetLowering(TM, STI) { addRegisterClass(MVT::i32, &dxil::DXILClassRegClass); computeRegisterProperties(STI.getRegisterInfo()); } diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 60dfd96..a755dd5 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -29,11 +29,12 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const { switch (ID) { case Intrinsic::dx_asdouble: - case Intrinsic::dx_isinf: - case Intrinsic::dx_isnan: case Intrinsic::dx_firstbitlow: - case Intrinsic::dx_firstbituhigh: case Intrinsic::dx_firstbitshigh: + case Intrinsic::dx_firstbituhigh: + case Intrinsic::dx_isinf: + case Intrinsic::dx_isnan: + case Intrinsic::dx_legacyf16tof32: return OpdIdx == 0; default: return OpdIdx == -1; @@ -50,6 +51,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_frac: case Intrinsic::dx_isinf: case Intrinsic::dx_isnan: + case Intrinsic::dx_legacyf16tof32: case Intrinsic::dx_rsqrt: case Intrinsic::dx_saturate: case Intrinsic::dx_splitdouble: @@ -62,6 +64,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_wave_reduce_usum: case Intrinsic::dx_imad: case Intrinsic::dx_umad: + case Intrinsic::dx_ddx_coarse: + case Intrinsic::dx_ddy_coarse: return true; default: return false; |
