diff options
Diffstat (limited to 'llvm/lib/Target/DirectX/DXILOpLowering.cpp')
-rw-r--r-- | llvm/lib/Target/DirectX/DXILOpLowering.cpp | 82 |
1 files changed, 56 insertions, 26 deletions
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index f43815b..0c245c1 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -616,7 +616,10 @@ public: return false; } - [[nodiscard]] bool lowerTypedBufferStore(Function &F) { + [[nodiscard]] bool lowerBufferStore(Function &F, bool IsRaw) { + Triple TT(Triple(M.getTargetTriple())); + VersionTuple DXILVersion = TT.getDXILVersion(); + const DataLayout &DL = F.getDataLayout(); IRBuilder<> &IRB = OpBuilder.getIRB(); Type *Int8Ty = IRB.getInt8Ty(); Type *Int32Ty = IRB.getInt32Ty(); @@ -627,51 +630,75 @@ public: Value *Handle = createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); Value *Index0 = CI->getArgOperand(1); - Value *Index1 = UndefValue::get(Int32Ty); - // For typed stores, the mask must always cover all four elements. - Constant *Mask = ConstantInt::get(Int8Ty, 0xF); + Value *Index1 = IsRaw ? CI->getArgOperand(2) : UndefValue::get(Int32Ty); + + Value *Data = CI->getArgOperand(IsRaw ? 3 : 2); + Type *DataTy = Data->getType(); + Type *ScalarTy = DataTy->getScalarType(); - Value *Data = CI->getArgOperand(2); - auto *DataTy = dyn_cast<FixedVectorType>(Data->getType()); - if (!DataTy || DataTy->getNumElements() != 4) + uint64_t NumElements = + DL.getTypeSizeInBits(DataTy) / DL.getTypeSizeInBits(ScalarTy); + Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements)); + + // TODO: check that we only have vector or scalar... + if (!IsRaw && NumElements != 4) return make_error<StringError>( "typedBufferStore data must be a vector of 4 elements", inconvertibleErrorCode()); + else if (NumElements > 4) + return make_error<StringError>( + "rawBufferStore data must have at most 4 elements", + inconvertibleErrorCode()); - // Since we're post-scalarizer, we likely have a vector that's constructed - // solely for the argument of the store. If so, just use the scalar values - // from before they're inserted into the temporary. std::array<Value *, 4> DataElements{nullptr, nullptr, nullptr, nullptr}; - auto *IEI = dyn_cast<InsertElementInst>(Data); - while (IEI) { - auto *IndexOp = dyn_cast<ConstantInt>(IEI->getOperand(2)); - if (!IndexOp) - break; - size_t IndexVal = IndexOp->getZExtValue(); - assert(IndexVal < 4 && "Too many elements for buffer store"); - DataElements[IndexVal] = IEI->getOperand(1); - IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0)); + if (DataTy == ScalarTy) + DataElements[0] = Data; + else { + // Since we're post-scalarizer, if we see a vector here it's likely + // constructed solely for the argument of the store. Just use the scalar + // values from before they're inserted into the temporary. + auto *IEI = dyn_cast<InsertElementInst>(Data); + while (IEI) { + auto *IndexOp = dyn_cast<ConstantInt>(IEI->getOperand(2)); + if (!IndexOp) + break; + size_t IndexVal = IndexOp->getZExtValue(); + assert(IndexVal < 4 && "Too many elements for buffer store"); + DataElements[IndexVal] = IEI->getOperand(1); + IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0)); + } } // If for some reason we weren't able to forward the arguments from the - // scalarizer artifact, then we need to actually extract elements from the - // vector. - for (int I = 0, E = 4; I != E; ++I) + // scalarizer artifact, then we may need to actually extract elements from + // the vector. + for (int I = 0, E = NumElements; I < E; ++I) if (DataElements[I] == nullptr) DataElements[I] = IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, I)); + // For any elements beyond the length of the vector, fill up with undef. + for (int I = NumElements, E = 4; I < E; ++I) + if (DataElements[I] == nullptr) + DataElements[I] = UndefValue::get(ScalarTy); - std::array<Value *, 8> Args{ + dxil::OpCode Op = OpCode::BufferStore; + SmallVector<Value *, 9> Args{ Handle, Index0, Index1, DataElements[0], DataElements[1], DataElements[2], DataElements[3], Mask}; + if (IsRaw && DXILVersion >= VersionTuple(1, 2)) { + Op = OpCode::RawBufferStore; + // RawBufferStore requires the alignment + Args.push_back( + ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value())); + } Expected<CallInst *> OpCall = - OpBuilder.tryCreateOp(OpCode::BufferStore, Args, CI->getName()); + OpBuilder.tryCreateOp(Op, Args, CI->getName()); if (Error E = OpCall.takeError()) return E; CI->eraseFromParent(); // Clean up any leftover `insertelement`s - IEI = dyn_cast<InsertElementInst>(Data); + auto *IEI = dyn_cast<InsertElementInst>(Data); while (IEI && IEI->use_empty()) { InsertElementInst *Tmp = IEI; IEI = dyn_cast<InsertElementInst>(IEI->getOperand(0)); @@ -776,11 +803,14 @@ public: HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true); break; case Intrinsic::dx_resource_store_typedbuffer: - HasErrors |= lowerTypedBufferStore(F); + HasErrors |= lowerBufferStore(F, /*IsRaw=*/false); break; case Intrinsic::dx_resource_load_rawbuffer: HasErrors |= lowerRawBufferLoad(F); break; + case Intrinsic::dx_resource_store_rawbuffer: + HasErrors |= lowerBufferStore(F, /*IsRaw=*/true); + break; case Intrinsic::dx_resource_updatecounter: HasErrors |= lowerUpdateCounter(F); break; |