diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/ObjectYAML/DXContainerYAML.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Support/Path.cpp | 100 | ||||
-rw-r--r-- | llvm/lib/Support/ScopedPrinter.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/Support/VirtualFileSystem.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 11 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 29 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 110 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.cpp | 10 |
14 files changed, 199 insertions, 163 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 701a6a2..11efe49 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -473,11 +473,9 @@ bool AsmPrinter::doInitialization(Module &M) { AddrLabelSymbols = nullptr; // Initialize TargetLoweringObjectFile. - const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) - .Initialize(OutContext, TM); + TM.getObjFileLowering()->Initialize(OutContext, TM); - const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) - .getModuleMetadata(M); + TM.getObjFileLowering()->getModuleMetadata(M); // On AIX, we delay emitting any section information until // after emitting the .file pseudo-op. This allows additional diff --git a/llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp b/llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp index 92c62b8..2b33e56 100644 --- a/llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp +++ b/llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp @@ -113,6 +113,13 @@ static raw_ostream &operator<<(raw_ostream &OS, return OS; } +static raw_ostream &operator<<(raw_ostream &OS, + const llvm::dxbc::StaticSamplerFlags &Flags) { + printFlags(OS, Flags, dxbc::getStaticSamplerFlags()); + + return OS; +} + raw_ostream &operator<<(raw_ostream &OS, const dxbc::RootFlags &Flags) { OS << "RootFlags("; printFlags(OS, Flags, dxbc::getRootFlags()); @@ -172,7 +179,7 @@ raw_ostream &operator<<(raw_ostream &OS, const StaticSampler &Sampler) { << ", borderColor = " << Sampler.BorderColor << ", minLOD = " << Sampler.MinLOD << ", maxLOD = " << Sampler.MaxLOD << ", space = " << Sampler.Space << ", visibility = " << Sampler.Visibility - << ")"; + << ", flags = " << Sampler.Flags << ")"; return OS; } diff --git a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp index 5785505..7a0cf40 100644 --- a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp +++ b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp @@ -218,6 +218,7 @@ MDNode *MetadataBuilder::BuildStaticSampler(const StaticSampler &Sampler) { ConstantAsMetadata::get(Builder.getInt32(Sampler.Space)), ConstantAsMetadata::get( Builder.getInt32(to_underlying(Sampler.Visibility))), + ConstantAsMetadata::get(Builder.getInt32(to_underlying(Sampler.Flags))), }; return MDNode::get(Ctx, Operands); } @@ -417,7 +418,7 @@ Error MetadataParser::parseDescriptorTable(mcdxbc::RootSignatureDesc &RSD, Error MetadataParser::parseStaticSampler(mcdxbc::RootSignatureDesc &RSD, MDNode *StaticSamplerNode) { - if (StaticSamplerNode->getNumOperands() != 14) + if (StaticSamplerNode->getNumOperands() != 15) return make_error<InvalidRSMetadataFormat>("Static Sampler"); mcdxbc::StaticSampler Sampler; @@ -501,6 +502,17 @@ Error MetadataParser::parseStaticSampler(mcdxbc::RootSignatureDesc &RSD, return Error(std::move(E)); Sampler.ShaderVisibility = *Visibility; + if (RSD.Version < 3) { + RSD.StaticSamplers.push_back(Sampler); + return Error::success(); + } + assert(RSD.Version >= 3); + + if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 14)) + Sampler.Flags = *Val; + else + return make_error<InvalidRSMetadataValue>("Static Sampler Flags"); + RSD.StaticSamplers.push_back(Sampler); return Error::success(); } diff --git a/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp b/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp index 2c78d62..8a2b03d 100644 --- a/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp +++ b/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp @@ -40,7 +40,7 @@ bool verifyRootDescriptorFlag(uint32_t Version, uint32_t FlagsVal) { if (Version == 1) return Flags == FlagT::DataVolatile; - assert(Version == 2 && "Provided invalid root signature version"); + assert((Version <= 3) && "Provided invalid root signature version"); // The data-specific flags are mutually exclusive. FlagT DataFlags = FlagT::DataVolatile | FlagT::DataStatic | diff --git a/llvm/lib/ObjectYAML/DXContainerYAML.cpp b/llvm/lib/ObjectYAML/DXContainerYAML.cpp index 3c09ae4..5dff9ba 100644 --- a/llvm/lib/ObjectYAML/DXContainerYAML.cpp +++ b/llvm/lib/ObjectYAML/DXContainerYAML.cpp @@ -154,7 +154,7 @@ DXContainerYAML::RootSignatureYamlDesc::create( if (Error E = readDescriptorRanges<dxbc::RTS0::v1::DescriptorRange>( Header, RootSigDesc, DTV)) return std::move(E); - } else if (Version == 2) { + } else if (Version == 2 || Version == 3) { if (Error E = readDescriptorRanges<dxbc::RTS0::v2::DescriptorRange>( Header, RootSigDesc, DTV)) return std::move(E); diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp index 761d29e..3e06666 100644 --- a/llvm/lib/Support/Path.cpp +++ b/llvm/lib/Support/Path.cpp @@ -700,6 +700,55 @@ bool is_relative(const Twine &path, Style style) { return !is_absolute(path, style); } +void make_absolute(const Twine ¤t_directory, + SmallVectorImpl<char> &path) { + StringRef p(path.data(), path.size()); + + bool rootDirectory = has_root_directory(p); + bool rootName = has_root_name(p); + + // Already absolute. + if ((rootName || is_style_posix(Style::native)) && rootDirectory) + return; + + // All the following conditions will need the current directory. + SmallString<128> current_dir; + current_directory.toVector(current_dir); + + // Relative path. Prepend the current directory. + if (!rootName && !rootDirectory) { + // Append path to the current directory. + append(current_dir, p); + // Set path to the result. + path.swap(current_dir); + return; + } + + if (!rootName && rootDirectory) { + StringRef cdrn = root_name(current_dir); + SmallString<128> curDirRootName(cdrn.begin(), cdrn.end()); + append(curDirRootName, p); + // Set path to the result. + path.swap(curDirRootName); + return; + } + + if (rootName && !rootDirectory) { + StringRef pRootName = root_name(p); + StringRef bRootDirectory = root_directory(current_dir); + StringRef bRelativePath = relative_path(current_dir); + StringRef pRelativePath = relative_path(p); + + SmallString<128> res; + append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath); + path.swap(res); + return; + } + + llvm_unreachable("All rootName and rootDirectory combinations should have " + "occurred above!"); +} + StringRef remove_leading_dotslash(StringRef Path, Style style) { // Remove leading "./" (or ".//" or "././" etc.) while (Path.size() > 2 && Path[0] == '.' && is_separator(Path[1], style)) { @@ -903,55 +952,6 @@ getPotentiallyUniqueTempFileName(const Twine &Prefix, StringRef Suffix, return createTemporaryFile(Prefix, Suffix, Dummy, ResultPath, FS_Name); } -void make_absolute(const Twine ¤t_directory, - SmallVectorImpl<char> &path) { - StringRef p(path.data(), path.size()); - - bool rootDirectory = path::has_root_directory(p); - bool rootName = path::has_root_name(p); - - // Already absolute. - if ((rootName || is_style_posix(Style::native)) && rootDirectory) - return; - - // All of the following conditions will need the current directory. - SmallString<128> current_dir; - current_directory.toVector(current_dir); - - // Relative path. Prepend the current directory. - if (!rootName && !rootDirectory) { - // Append path to the current directory. - path::append(current_dir, p); - // Set path to the result. - path.swap(current_dir); - return; - } - - if (!rootName && rootDirectory) { - StringRef cdrn = path::root_name(current_dir); - SmallString<128> curDirRootName(cdrn.begin(), cdrn.end()); - path::append(curDirRootName, p); - // Set path to the result. - path.swap(curDirRootName); - return; - } - - if (rootName && !rootDirectory) { - StringRef pRootName = path::root_name(p); - StringRef bRootDirectory = path::root_directory(current_dir); - StringRef bRelativePath = path::relative_path(current_dir); - StringRef pRelativePath = path::relative_path(p); - - SmallString<128> res; - path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath); - path.swap(res); - return; - } - - llvm_unreachable("All rootName and rootDirectory combinations should have " - "occurred above!"); -} - std::error_code make_absolute(SmallVectorImpl<char> &path) { if (path::is_absolute(path)) return {}; @@ -960,7 +960,7 @@ std::error_code make_absolute(SmallVectorImpl<char> &path) { if (std::error_code ec = current_path(current_dir)) return ec; - make_absolute(current_dir, path); + path::make_absolute(current_dir, path); return {}; } diff --git a/llvm/lib/Support/ScopedPrinter.cpp b/llvm/lib/Support/ScopedPrinter.cpp index a17e397..efb6178 100644 --- a/llvm/lib/Support/ScopedPrinter.cpp +++ b/llvm/lib/Support/ScopedPrinter.cpp @@ -1,12 +1,17 @@ -#include "llvm/Support/ScopedPrinter.h" +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/Format.h" -using namespace llvm::support; +using namespace llvm; -namespace llvm { - -raw_ostream &operator<<(raw_ostream &OS, const HexNumber &Value) { +raw_ostream &llvm::operator<<(raw_ostream &OS, const HexNumber &Value) { OS << "0x" << utohexstr(Value.Value); return OS; } @@ -45,5 +50,3 @@ JSONScopedPrinter::JSONScopedPrinter( if (this->OuterScope) this->OuterScope->setPrinter(*this); } - -} // namespace llvm diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index 44d2ee7..c754b30 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -133,7 +133,7 @@ std::error_code FileSystem::makeAbsolute(SmallVectorImpl<char> &Path) const { if (!WorkingDir) return WorkingDir.getError(); - llvm::sys::fs::make_absolute(WorkingDir.get(), Path); + sys::path::make_absolute(WorkingDir.get(), Path); return {}; } @@ -300,7 +300,7 @@ private: if (!WD || !*WD) return Path; Path.toVector(Storage); - sys::fs::make_absolute(WD->get().Resolved, Storage); + sys::path::make_absolute(WD->get().Resolved, Storage); return Storage; } diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 77df721..54f57e0 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -314,9 +314,10 @@ let SubtargetPredicate = HasGFX950Insts, OtherPredicates = [HasBF16ConversionIns defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>; } let SubtargetPredicate = isGFX1250Plus, OtherPredicates = [HasBF16ConversionInsts] in { - defm V_CVT_F32_BF16_gfx1250 : VOP1Inst_t16_with_profiles <"v_cvt_f32_bf16_gfx1250", VOP_F32_BF16, - VOPProfile_CVT_F32_BF16_gfx1250_t16, - VOPProfile_CVT_F32_BF16_gfx1250_fake16>; + let True16Predicate = UseRealTrue16Insts in + defm V_CVT_F32_BF16_gfx1250_t16 : VOP1Inst <"V_CVT_F32_BF16_gfx1250_t16", VOPProfile_CVT_F32_BF16_gfx1250_t16>; + let True16Predicate = UseFakeTrue16Insts in + defm V_CVT_F32_BF16_gfx1250_fake16 : VOP1Inst <"V_CVT_F32_BF16_gfx1250_fake16", VOPProfile_CVT_F32_BF16_gfx1250_fake16>; } let ReadsModeReg = 0, mayRaiseFPException = 0 in { @@ -899,6 +900,7 @@ class VOP1_DPP16_Gen<bits<8> op, VOP1_DPP_Pseudo ps, GFXGen Gen, VOPProfile p = let DecoderNamespace = Gen.DecoderNamespace; let OtherPredicates = !listconcat(ps.OtherPredicates, !if(p.HasExt64BitDPP, [HasDPALU_DPP], [])); + let True16Predicate = ps.True16Predicate; } class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : @@ -921,6 +923,7 @@ class VOP1_DPP8_Gen<bits<8> op, VOP1_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pf VOP1_DPP8<op, ps, p> { let AssemblerPredicate = Gen.AssemblerPredicate; let DecoderNamespace = Gen.DecoderNamespace; + let True16Predicate = ps.True16Predicate; } //===----------------------------------------------------------------------===// @@ -1149,7 +1152,7 @@ defm V_TANH_F16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>; defm V_PERMLANE16_SWAP_B32 : VOP1_Real_OpSelIsDPP_gfx1250<0x049>; defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>; defm V_PRNG_B32 : VOP1_Real_FULL<GFX1250Gen, 0x04b>; -defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">; +defm V_CVT_F32_BF16_gfx1250 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16">; defm V_SAT_PK4_I4_I8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x073>; defm V_SAT_PK4_U4_U8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x074>; defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index d4124ae..ee25f69 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -3139,8 +3139,8 @@ bool RISCVTTIImpl::isProfitableToSinkOperands( bool IsVPSplat = match(Op, m_Intrinsic<Intrinsic::experimental_vp_splat>( m_Value(), m_Value(), m_Value())); if (!IsVPSplat && - !match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), - m_Undef(), m_ZeroMask()))) + !match(Op, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()), + m_Value(), m_ZeroMask()))) continue; // Don't sink i1 splats. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 6ef3066..18a45c6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -319,20 +319,20 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) { return nullptr; } -/// Find elements of V demanded by UserInstr. -static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { +/// Find elements of V demanded by UserInstr. If returns false, we were not able +/// to determine all elements. +static bool findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr, + APInt &UnionUsedElts) { unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); - // Conservatively assume that all elements are needed. - APInt UsedElts(APInt::getAllOnes(VWidth)); - switch (UserInstr->getOpcode()) { case Instruction::ExtractElement: { ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr); assert(EEI->getVectorOperand() == V); ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand()); if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) { - UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue()); + UnionUsedElts.setBit(EEIIndexC->getZExtValue()); + return true; } break; } @@ -341,23 +341,23 @@ static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { unsigned MaskNumElts = cast<FixedVectorType>(UserInstr->getType())->getNumElements(); - UsedElts = APInt(VWidth, 0); - for (unsigned i = 0; i < MaskNumElts; i++) { - unsigned MaskVal = Shuffle->getMaskValue(i); + for (auto I : llvm::seq(MaskNumElts)) { + unsigned MaskVal = Shuffle->getMaskValue(I); if (MaskVal == -1u || MaskVal >= 2 * VWidth) continue; if (Shuffle->getOperand(0) == V && (MaskVal < VWidth)) - UsedElts.setBit(MaskVal); + UnionUsedElts.setBit(MaskVal); if (Shuffle->getOperand(1) == V && ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth))) - UsedElts.setBit(MaskVal - VWidth); + UnionUsedElts.setBit(MaskVal - VWidth); } - break; + return true; } default: break; } - return UsedElts; + + return false; } /// Find union of elements of V demanded by all its users. @@ -370,7 +370,8 @@ static APInt findDemandedEltsByAllUsers(Value *V) { APInt UnionUsedElts(VWidth, 0); for (const Use &U : V->uses()) { if (Instruction *I = dyn_cast<Instruction>(U.getUser())) { - UnionUsedElts |= findDemandedEltsBySingleUser(V, I); + if (!findDemandedEltsBySingleUser(V, I, UnionUsedElts)) + return APInt::getAllOnes(VWidth); } else { UnionUsedElts = APInt::getAllOnes(VWidth); break; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 12fb46d..e5d6c81 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5699,6 +5699,20 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { Worklist.push_back(InstOp); } + auto UpdateMemOpUserCost = [this, VF](LoadInst *LI) { + // If there are direct memory op users of the newly scalarized load, + // their cost may have changed because there's no scalarization + // overhead for the operand. Update it. + for (User *U : LI->users()) { + if (!isa<LoadInst, StoreInst>(U)) + continue; + if (getWideningDecision(cast<Instruction>(U), VF) != CM_Scalarize) + continue; + setWideningDecision( + cast<Instruction>(U), VF, CM_Scalarize, + getMemInstScalarizationCost(cast<Instruction>(U), VF)); + } + }; for (auto *I : AddrDefs) { if (isa<LoadInst>(I)) { // Setting the desired widening decision should ideally be handled in @@ -5708,21 +5722,24 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { InstWidening Decision = getWideningDecision(I, VF); if (Decision == CM_Widen || Decision == CM_Widen_Reverse || (!isPredicatedInst(I) && !Legal->isUniformMemOp(*I, VF) && - Decision == CM_Scalarize)) + Decision == CM_Scalarize)) { // Scalarize a widened load of address or update the cost of a scalar // load of an address. setWideningDecision( I, VF, CM_Scalarize, (VF.getKnownMinValue() * getMemoryInstructionCost(I, ElementCount::getFixed(1)))); - else if (const auto *Group = getInterleavedAccessGroup(I)) { + UpdateMemOpUserCost(cast<LoadInst>(I)); + } else if (const auto *Group = getInterleavedAccessGroup(I)) { // Scalarize an interleave group of address loads. for (unsigned I = 0; I < Group->getFactor(); ++I) { - if (Instruction *Member = Group->getMember(I)) + if (Instruction *Member = Group->getMember(I)) { setWideningDecision( Member, VF, CM_Scalarize, (VF.getKnownMinValue() * getMemoryInstructionCost(Member, ElementCount::getFixed(1)))); + UpdateMemOpUserCost(cast<LoadInst>(Member)); + } } } } else { @@ -9521,55 +9538,52 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); Header->setName("vec.epilog.vector.body"); - DenseMap<Value *, Value *> ToFrozen; - SmallVector<Instruction *> InstsToMove; // Ensure that the start values for all header phi recipes are updated before // vectorizing the epilogue loop. - for (VPRecipeBase &R : Header->phis()) { - if (auto *IV = dyn_cast<VPCanonicalIVPHIRecipe>(&R)) { - // When vectorizing the epilogue loop, the canonical induction start - // value needs to be changed from zero to the value after the main - // vector loop. Find the resume value created during execution of the main - // VPlan. It must be the first phi in the loop preheader. - // FIXME: Improve modeling for canonical IV start values in the epilogue - // loop. - using namespace llvm::PatternMatch; - PHINode *EPResumeVal = &*L->getLoopPreheader()->phis().begin(); - for (Value *Inc : EPResumeVal->incoming_values()) { - if (match(Inc, m_SpecificInt(0))) - continue; - assert(!EPI.VectorTripCount && - "Must only have a single non-zero incoming value"); - EPI.VectorTripCount = Inc; - } - // If we didn't find a non-zero vector trip count, all incoming values - // must be zero, which also means the vector trip count is zero. Pick the - // first zero as vector trip count. - // TODO: We should not choose VF * UF so the main vector loop is known to - // be dead. - if (!EPI.VectorTripCount) { - assert( - EPResumeVal->getNumIncomingValues() > 0 && - all_of(EPResumeVal->incoming_values(), - [](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) && - "all incoming values must be 0"); - EPI.VectorTripCount = EPResumeVal->getOperand(0); - } - VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal); - assert(all_of(IV->users(), - [](const VPUser *U) { - return isa<VPScalarIVStepsRecipe>(U) || - isa<VPDerivedIVRecipe>(U) || - cast<VPRecipeBase>(U)->isScalarCast() || - cast<VPInstruction>(U)->getOpcode() == - Instruction::Add; - }) && - "the canonical IV should only be used by its increment or " - "ScalarIVSteps when resetting the start value"); - IV->setOperand(0, VPV); + VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV(); + // When vectorizing the epilogue loop, the canonical induction start + // value needs to be changed from zero to the value after the main + // vector loop. Find the resume value created during execution of the main + // VPlan. It must be the first phi in the loop preheader. + // FIXME: Improve modeling for canonical IV start values in the epilogue + // loop. + using namespace llvm::PatternMatch; + PHINode *EPResumeVal = &*L->getLoopPreheader()->phis().begin(); + for (Value *Inc : EPResumeVal->incoming_values()) { + if (match(Inc, m_SpecificInt(0))) continue; - } + assert(!EPI.VectorTripCount && + "Must only have a single non-zero incoming value"); + EPI.VectorTripCount = Inc; + } + // If we didn't find a non-zero vector trip count, all incoming values + // must be zero, which also means the vector trip count is zero. Pick the + // first zero as vector trip count. + // TODO: We should not choose VF * UF so the main vector loop is known to + // be dead. + if (!EPI.VectorTripCount) { + assert(EPResumeVal->getNumIncomingValues() > 0 && + all_of(EPResumeVal->incoming_values(), + [](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) && + "all incoming values must be 0"); + EPI.VectorTripCount = EPResumeVal->getOperand(0); + } + VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal); + assert(all_of(IV->users(), + [](const VPUser *U) { + return isa<VPScalarIVStepsRecipe>(U) || + isa<VPDerivedIVRecipe>(U) || + cast<VPRecipeBase>(U)->isScalarCast() || + cast<VPInstruction>(U)->getOpcode() == + Instruction::Add; + }) && + "the canonical IV should only be used by its increment or " + "ScalarIVSteps when resetting the start value"); + IV->setOperand(0, VPV); + DenseMap<Value *, Value *> ToFrozen; + SmallVector<Instruction *> InstsToMove; + for (VPRecipeBase &R : drop_begin(Header->phis())) { Value *ResumeV = nullptr; // TODO: Move setting of resume values to prepareToExecute. if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f77d587..fedca65 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2241,10 +2241,9 @@ public: /// TODO: If load combining is allowed in the IR optimizer, this analysis /// may not be necessary. bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const; - bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, - ArrayRef<unsigned> Order, const TargetTransformInfo &TTI, - const DataLayout &DL, ScalarEvolution &SE, - const int64_t Diff, StridedPtrInfo &SPtrInfo) const; + bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, + Align Alignment, const int64_t Diff, Value *Ptr0, + Value *PtrN, StridedPtrInfo &SPtrInfo) const; /// Checks if the given array of loads can be represented as a vectorized, /// scatter or just simple gather. @@ -6824,13 +6823,10 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, /// 4. Any pointer operand is an instruction with the users outside of the /// current graph (for masked gathers extra extractelement instructions /// might be required). -bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, - ArrayRef<unsigned> Order, - const TargetTransformInfo &TTI, - const DataLayout &DL, ScalarEvolution &SE, - const int64_t Diff, - StridedPtrInfo &SPtrInfo) const { - const size_t Sz = VL.size(); +bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, + Align Alignment, const int64_t Diff, Value *Ptr0, + Value *PtrN, StridedPtrInfo &SPtrInfo) const { + const size_t Sz = PointerOps.size(); if (Diff % (Sz - 1) != 0) return false; @@ -6842,7 +6838,6 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, }); const uint64_t AbsoluteDiff = std::abs(Diff); - Type *ScalarTy = VL.front()->getType(); auto *VecTy = getWidenedType(ScalarTy, Sz); if (IsAnyPointerUsedOutGraph || (AbsoluteDiff > Sz && @@ -6853,20 +6848,9 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, int64_t Stride = Diff / static_cast<int64_t>(Sz - 1); if (Diff != Stride * static_cast<int64_t>(Sz - 1)) return false; - Align Alignment = - cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()]) - ->getAlign(); - if (!TTI.isLegalStridedLoadStore(VecTy, Alignment)) + if (!TTI->isLegalStridedLoadStore(VecTy, Alignment)) return false; - Value *Ptr0; - Value *PtrN; - if (Order.empty()) { - Ptr0 = PointerOps.front(); - PtrN = PointerOps.back(); - } else { - Ptr0 = PointerOps[Order.front()]; - PtrN = PointerOps[Order.back()]; - } + // Iterate through all pointers and check if all distances are // unique multiple of Dist. SmallSet<int64_t, 4> Dists; @@ -6875,14 +6859,14 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, if (Ptr == PtrN) Dist = Diff; else if (Ptr != Ptr0) - Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE); + Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE); // If the strides are not the same or repeated, we can't // vectorize. if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second) break; } if (Dists.size() == Sz) { - Type *StrideTy = DL.getIndexType(Ptr0->getType()); + Type *StrideTy = DL->getIndexType(Ptr0->getType()); SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride); SPtrInfo.Ty = getWidenedType(ScalarTy, Sz); return true; @@ -6971,7 +6955,11 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( cast<Instruction>(V), UserIgnoreList); })) return LoadsState::CompressVectorize; - if (isStridedLoad(VL, PointerOps, Order, *TTI, *DL, *SE, *Diff, SPtrInfo)) + Align Alignment = + cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()]) + ->getAlign(); + if (isStridedLoad(PointerOps, ScalarTy, Alignment, *Diff, Ptr0, PtrN, + SPtrInfo)) return LoadsState::StridedVectorize; } if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) || diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index ffd2e59..02eb637 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -978,6 +978,16 @@ void VPlan::execute(VPTransformState *State) { // If the original loop is unreachable, delete it and all its blocks. if (!ScalarPhVPBB->hasPredecessors()) { + // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit + // VPIRBBs in VPlan as well, otherwise we would retain references to deleted + // IR instructions. + for (VPIRBasicBlock *EB : getExitBlocks()) { + for (VPRecipeBase &R : make_early_inc_range(EB->phis())) { + if (R.getNumOperands() == 1) + R.eraseFromParent(); + } + } + Loop *OrigLoop = State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock()); auto Blocks = OrigLoop->getBlocksVector(); |