aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp6
-rw-r--r--llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp9
-rw-r--r--llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp14
-rw-r--r--llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp2
-rw-r--r--llvm/lib/ObjectYAML/DXContainerYAML.cpp2
-rw-r--r--llvm/lib/Support/Path.cpp100
-rw-r--r--llvm/lib/Support/ScopedPrinter.cpp17
-rw-r--r--llvm/lib/Support/VirtualFileSystem.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td11
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp29
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp110
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp44
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp10
14 files changed, 199 insertions, 163 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 701a6a2..11efe49 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -473,11 +473,9 @@ bool AsmPrinter::doInitialization(Module &M) {
AddrLabelSymbols = nullptr;
// Initialize TargetLoweringObjectFile.
- const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
- .Initialize(OutContext, TM);
+ TM.getObjFileLowering()->Initialize(OutContext, TM);
- const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
- .getModuleMetadata(M);
+ TM.getObjFileLowering()->getModuleMetadata(M);
// On AIX, we delay emitting any section information until
// after emitting the .file pseudo-op. This allows additional
diff --git a/llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp b/llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp
index 92c62b8..2b33e56 100644
--- a/llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp
+++ b/llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp
@@ -113,6 +113,13 @@ static raw_ostream &operator<<(raw_ostream &OS,
return OS;
}
+static raw_ostream &operator<<(raw_ostream &OS,
+ const llvm::dxbc::StaticSamplerFlags &Flags) {
+ printFlags(OS, Flags, dxbc::getStaticSamplerFlags());
+
+ return OS;
+}
+
raw_ostream &operator<<(raw_ostream &OS, const dxbc::RootFlags &Flags) {
OS << "RootFlags(";
printFlags(OS, Flags, dxbc::getRootFlags());
@@ -172,7 +179,7 @@ raw_ostream &operator<<(raw_ostream &OS, const StaticSampler &Sampler) {
<< ", borderColor = " << Sampler.BorderColor
<< ", minLOD = " << Sampler.MinLOD << ", maxLOD = " << Sampler.MaxLOD
<< ", space = " << Sampler.Space << ", visibility = " << Sampler.Visibility
- << ")";
+ << ", flags = " << Sampler.Flags << ")";
return OS;
}
diff --git a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
index 5785505..7a0cf40 100644
--- a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
+++ b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
@@ -218,6 +218,7 @@ MDNode *MetadataBuilder::BuildStaticSampler(const StaticSampler &Sampler) {
ConstantAsMetadata::get(Builder.getInt32(Sampler.Space)),
ConstantAsMetadata::get(
Builder.getInt32(to_underlying(Sampler.Visibility))),
+ ConstantAsMetadata::get(Builder.getInt32(to_underlying(Sampler.Flags))),
};
return MDNode::get(Ctx, Operands);
}
@@ -417,7 +418,7 @@ Error MetadataParser::parseDescriptorTable(mcdxbc::RootSignatureDesc &RSD,
Error MetadataParser::parseStaticSampler(mcdxbc::RootSignatureDesc &RSD,
MDNode *StaticSamplerNode) {
- if (StaticSamplerNode->getNumOperands() != 14)
+ if (StaticSamplerNode->getNumOperands() != 15)
return make_error<InvalidRSMetadataFormat>("Static Sampler");
mcdxbc::StaticSampler Sampler;
@@ -501,6 +502,17 @@ Error MetadataParser::parseStaticSampler(mcdxbc::RootSignatureDesc &RSD,
return Error(std::move(E));
Sampler.ShaderVisibility = *Visibility;
+ if (RSD.Version < 3) {
+ RSD.StaticSamplers.push_back(Sampler);
+ return Error::success();
+ }
+ assert(RSD.Version >= 3);
+
+ if (std::optional<uint32_t> Val = extractMdIntValue(StaticSamplerNode, 14))
+ Sampler.Flags = *Val;
+ else
+ return make_error<InvalidRSMetadataValue>("Static Sampler Flags");
+
RSD.StaticSamplers.push_back(Sampler);
return Error::success();
}
diff --git a/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp b/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp
index 2c78d62..8a2b03d 100644
--- a/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp
+++ b/llvm/lib/Frontend/HLSL/RootSignatureValidations.cpp
@@ -40,7 +40,7 @@ bool verifyRootDescriptorFlag(uint32_t Version, uint32_t FlagsVal) {
if (Version == 1)
return Flags == FlagT::DataVolatile;
- assert(Version == 2 && "Provided invalid root signature version");
+ assert((Version <= 3) && "Provided invalid root signature version");
// The data-specific flags are mutually exclusive.
FlagT DataFlags = FlagT::DataVolatile | FlagT::DataStatic |
diff --git a/llvm/lib/ObjectYAML/DXContainerYAML.cpp b/llvm/lib/ObjectYAML/DXContainerYAML.cpp
index 3c09ae4..5dff9ba 100644
--- a/llvm/lib/ObjectYAML/DXContainerYAML.cpp
+++ b/llvm/lib/ObjectYAML/DXContainerYAML.cpp
@@ -154,7 +154,7 @@ DXContainerYAML::RootSignatureYamlDesc::create(
if (Error E = readDescriptorRanges<dxbc::RTS0::v1::DescriptorRange>(
Header, RootSigDesc, DTV))
return std::move(E);
- } else if (Version == 2) {
+ } else if (Version == 2 || Version == 3) {
if (Error E = readDescriptorRanges<dxbc::RTS0::v2::DescriptorRange>(
Header, RootSigDesc, DTV))
return std::move(E);
diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp
index 761d29e..3e06666 100644
--- a/llvm/lib/Support/Path.cpp
+++ b/llvm/lib/Support/Path.cpp
@@ -700,6 +700,55 @@ bool is_relative(const Twine &path, Style style) {
return !is_absolute(path, style);
}
+void make_absolute(const Twine &current_directory,
+ SmallVectorImpl<char> &path) {
+ StringRef p(path.data(), path.size());
+
+ bool rootDirectory = has_root_directory(p);
+ bool rootName = has_root_name(p);
+
+ // Already absolute.
+ if ((rootName || is_style_posix(Style::native)) && rootDirectory)
+ return;
+
+ // All the following conditions will need the current directory.
+ SmallString<128> current_dir;
+ current_directory.toVector(current_dir);
+
+ // Relative path. Prepend the current directory.
+ if (!rootName && !rootDirectory) {
+ // Append path to the current directory.
+ append(current_dir, p);
+ // Set path to the result.
+ path.swap(current_dir);
+ return;
+ }
+
+ if (!rootName && rootDirectory) {
+ StringRef cdrn = root_name(current_dir);
+ SmallString<128> curDirRootName(cdrn.begin(), cdrn.end());
+ append(curDirRootName, p);
+ // Set path to the result.
+ path.swap(curDirRootName);
+ return;
+ }
+
+ if (rootName && !rootDirectory) {
+ StringRef pRootName = root_name(p);
+ StringRef bRootDirectory = root_directory(current_dir);
+ StringRef bRelativePath = relative_path(current_dir);
+ StringRef pRelativePath = relative_path(p);
+
+ SmallString<128> res;
+ append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
+ path.swap(res);
+ return;
+ }
+
+ llvm_unreachable("All rootName and rootDirectory combinations should have "
+ "occurred above!");
+}
+
StringRef remove_leading_dotslash(StringRef Path, Style style) {
// Remove leading "./" (or ".//" or "././" etc.)
while (Path.size() > 2 && Path[0] == '.' && is_separator(Path[1], style)) {
@@ -903,55 +952,6 @@ getPotentiallyUniqueTempFileName(const Twine &Prefix, StringRef Suffix,
return createTemporaryFile(Prefix, Suffix, Dummy, ResultPath, FS_Name);
}
-void make_absolute(const Twine &current_directory,
- SmallVectorImpl<char> &path) {
- StringRef p(path.data(), path.size());
-
- bool rootDirectory = path::has_root_directory(p);
- bool rootName = path::has_root_name(p);
-
- // Already absolute.
- if ((rootName || is_style_posix(Style::native)) && rootDirectory)
- return;
-
- // All of the following conditions will need the current directory.
- SmallString<128> current_dir;
- current_directory.toVector(current_dir);
-
- // Relative path. Prepend the current directory.
- if (!rootName && !rootDirectory) {
- // Append path to the current directory.
- path::append(current_dir, p);
- // Set path to the result.
- path.swap(current_dir);
- return;
- }
-
- if (!rootName && rootDirectory) {
- StringRef cdrn = path::root_name(current_dir);
- SmallString<128> curDirRootName(cdrn.begin(), cdrn.end());
- path::append(curDirRootName, p);
- // Set path to the result.
- path.swap(curDirRootName);
- return;
- }
-
- if (rootName && !rootDirectory) {
- StringRef pRootName = path::root_name(p);
- StringRef bRootDirectory = path::root_directory(current_dir);
- StringRef bRelativePath = path::relative_path(current_dir);
- StringRef pRelativePath = path::relative_path(p);
-
- SmallString<128> res;
- path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
- path.swap(res);
- return;
- }
-
- llvm_unreachable("All rootName and rootDirectory combinations should have "
- "occurred above!");
-}
-
std::error_code make_absolute(SmallVectorImpl<char> &path) {
if (path::is_absolute(path))
return {};
@@ -960,7 +960,7 @@ std::error_code make_absolute(SmallVectorImpl<char> &path) {
if (std::error_code ec = current_path(current_dir))
return ec;
- make_absolute(current_dir, path);
+ path::make_absolute(current_dir, path);
return {};
}
diff --git a/llvm/lib/Support/ScopedPrinter.cpp b/llvm/lib/Support/ScopedPrinter.cpp
index a17e397..efb6178 100644
--- a/llvm/lib/Support/ScopedPrinter.cpp
+++ b/llvm/lib/Support/ScopedPrinter.cpp
@@ -1,12 +1,17 @@
-#include "llvm/Support/ScopedPrinter.h"
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/Format.h"
-using namespace llvm::support;
+using namespace llvm;
-namespace llvm {
-
-raw_ostream &operator<<(raw_ostream &OS, const HexNumber &Value) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const HexNumber &Value) {
OS << "0x" << utohexstr(Value.Value);
return OS;
}
@@ -45,5 +50,3 @@ JSONScopedPrinter::JSONScopedPrinter(
if (this->OuterScope)
this->OuterScope->setPrinter(*this);
}
-
-} // namespace llvm
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 44d2ee7..c754b30 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -133,7 +133,7 @@ std::error_code FileSystem::makeAbsolute(SmallVectorImpl<char> &Path) const {
if (!WorkingDir)
return WorkingDir.getError();
- llvm::sys::fs::make_absolute(WorkingDir.get(), Path);
+ sys::path::make_absolute(WorkingDir.get(), Path);
return {};
}
@@ -300,7 +300,7 @@ private:
if (!WD || !*WD)
return Path;
Path.toVector(Storage);
- sys::fs::make_absolute(WD->get().Resolved, Storage);
+ sys::path::make_absolute(WD->get().Resolved, Storage);
return Storage;
}
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 77df721..54f57e0 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -314,9 +314,10 @@ let SubtargetPredicate = HasGFX950Insts, OtherPredicates = [HasBF16ConversionIns
defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
}
let SubtargetPredicate = isGFX1250Plus, OtherPredicates = [HasBF16ConversionInsts] in {
- defm V_CVT_F32_BF16_gfx1250 : VOP1Inst_t16_with_profiles <"v_cvt_f32_bf16_gfx1250", VOP_F32_BF16,
- VOPProfile_CVT_F32_BF16_gfx1250_t16,
- VOPProfile_CVT_F32_BF16_gfx1250_fake16>;
+ let True16Predicate = UseRealTrue16Insts in
+ defm V_CVT_F32_BF16_gfx1250_t16 : VOP1Inst <"V_CVT_F32_BF16_gfx1250_t16", VOPProfile_CVT_F32_BF16_gfx1250_t16>;
+ let True16Predicate = UseFakeTrue16Insts in
+ defm V_CVT_F32_BF16_gfx1250_fake16 : VOP1Inst <"V_CVT_F32_BF16_gfx1250_fake16", VOPProfile_CVT_F32_BF16_gfx1250_fake16>;
}
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
@@ -899,6 +900,7 @@ class VOP1_DPP16_Gen<bits<8> op, VOP1_DPP_Pseudo ps, GFXGen Gen, VOPProfile p =
let DecoderNamespace = Gen.DecoderNamespace;
let OtherPredicates = !listconcat(ps.OtherPredicates,
!if(p.HasExt64BitDPP, [HasDPALU_DPP], []));
+ let True16Predicate = ps.True16Predicate;
}
class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
@@ -921,6 +923,7 @@ class VOP1_DPP8_Gen<bits<8> op, VOP1_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pf
VOP1_DPP8<op, ps, p> {
let AssemblerPredicate = Gen.AssemblerPredicate;
let DecoderNamespace = Gen.DecoderNamespace;
+ let True16Predicate = ps.True16Predicate;
}
//===----------------------------------------------------------------------===//
@@ -1149,7 +1152,7 @@ defm V_TANH_F16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>;
defm V_PERMLANE16_SWAP_B32 : VOP1_Real_OpSelIsDPP_gfx1250<0x049>;
defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>;
defm V_PRNG_B32 : VOP1_Real_FULL<GFX1250Gen, 0x04b>;
-defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
+defm V_CVT_F32_BF16_gfx1250 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16">;
defm V_SAT_PK4_I4_I8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x073>;
defm V_SAT_PK4_U4_U8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x074>;
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index d4124ae..ee25f69 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -3139,8 +3139,8 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
bool IsVPSplat = match(Op, m_Intrinsic<Intrinsic::experimental_vp_splat>(
m_Value(), m_Value(), m_Value()));
if (!IsVPSplat &&
- !match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
- m_Undef(), m_ZeroMask())))
+ !match(Op, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()),
+ m_Value(), m_ZeroMask())))
continue;
// Don't sink i1 splats.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 6ef3066..18a45c6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -319,20 +319,20 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
return nullptr;
}
-/// Find elements of V demanded by UserInstr.
-static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
+/// Find elements of V demanded by UserInstr. If returns false, we were not able
+/// to determine all elements.
+static bool findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr,
+ APInt &UnionUsedElts) {
unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
- // Conservatively assume that all elements are needed.
- APInt UsedElts(APInt::getAllOnes(VWidth));
-
switch (UserInstr->getOpcode()) {
case Instruction::ExtractElement: {
ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
assert(EEI->getVectorOperand() == V);
ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
- UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
+ UnionUsedElts.setBit(EEIIndexC->getZExtValue());
+ return true;
}
break;
}
@@ -341,23 +341,23 @@ static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
unsigned MaskNumElts =
cast<FixedVectorType>(UserInstr->getType())->getNumElements();
- UsedElts = APInt(VWidth, 0);
- for (unsigned i = 0; i < MaskNumElts; i++) {
- unsigned MaskVal = Shuffle->getMaskValue(i);
+ for (auto I : llvm::seq(MaskNumElts)) {
+ unsigned MaskVal = Shuffle->getMaskValue(I);
if (MaskVal == -1u || MaskVal >= 2 * VWidth)
continue;
if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
- UsedElts.setBit(MaskVal);
+ UnionUsedElts.setBit(MaskVal);
if (Shuffle->getOperand(1) == V &&
((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
- UsedElts.setBit(MaskVal - VWidth);
+ UnionUsedElts.setBit(MaskVal - VWidth);
}
- break;
+ return true;
}
default:
break;
}
- return UsedElts;
+
+ return false;
}
/// Find union of elements of V demanded by all its users.
@@ -370,7 +370,8 @@ static APInt findDemandedEltsByAllUsers(Value *V) {
APInt UnionUsedElts(VWidth, 0);
for (const Use &U : V->uses()) {
if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
- UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
+ if (!findDemandedEltsBySingleUser(V, I, UnionUsedElts))
+ return APInt::getAllOnes(VWidth);
} else {
UnionUsedElts = APInt::getAllOnes(VWidth);
break;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 12fb46d..e5d6c81 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5699,6 +5699,20 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
Worklist.push_back(InstOp);
}
+ auto UpdateMemOpUserCost = [this, VF](LoadInst *LI) {
+ // If there are direct memory op users of the newly scalarized load,
+ // their cost may have changed because there's no scalarization
+ // overhead for the operand. Update it.
+ for (User *U : LI->users()) {
+ if (!isa<LoadInst, StoreInst>(U))
+ continue;
+ if (getWideningDecision(cast<Instruction>(U), VF) != CM_Scalarize)
+ continue;
+ setWideningDecision(
+ cast<Instruction>(U), VF, CM_Scalarize,
+ getMemInstScalarizationCost(cast<Instruction>(U), VF));
+ }
+ };
for (auto *I : AddrDefs) {
if (isa<LoadInst>(I)) {
// Setting the desired widening decision should ideally be handled in
@@ -5708,21 +5722,24 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
InstWidening Decision = getWideningDecision(I, VF);
if (Decision == CM_Widen || Decision == CM_Widen_Reverse ||
(!isPredicatedInst(I) && !Legal->isUniformMemOp(*I, VF) &&
- Decision == CM_Scalarize))
+ Decision == CM_Scalarize)) {
// Scalarize a widened load of address or update the cost of a scalar
// load of an address.
setWideningDecision(
I, VF, CM_Scalarize,
(VF.getKnownMinValue() *
getMemoryInstructionCost(I, ElementCount::getFixed(1))));
- else if (const auto *Group = getInterleavedAccessGroup(I)) {
+ UpdateMemOpUserCost(cast<LoadInst>(I));
+ } else if (const auto *Group = getInterleavedAccessGroup(I)) {
// Scalarize an interleave group of address loads.
for (unsigned I = 0; I < Group->getFactor(); ++I) {
- if (Instruction *Member = Group->getMember(I))
+ if (Instruction *Member = Group->getMember(I)) {
setWideningDecision(
Member, VF, CM_Scalarize,
(VF.getKnownMinValue() *
getMemoryInstructionCost(Member, ElementCount::getFixed(1))));
+ UpdateMemOpUserCost(cast<LoadInst>(Member));
+ }
}
}
} else {
@@ -9521,55 +9538,52 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");
- DenseMap<Value *, Value *> ToFrozen;
- SmallVector<Instruction *> InstsToMove;
// Ensure that the start values for all header phi recipes are updated before
// vectorizing the epilogue loop.
- for (VPRecipeBase &R : Header->phis()) {
- if (auto *IV = dyn_cast<VPCanonicalIVPHIRecipe>(&R)) {
- // When vectorizing the epilogue loop, the canonical induction start
- // value needs to be changed from zero to the value after the main
- // vector loop. Find the resume value created during execution of the main
- // VPlan. It must be the first phi in the loop preheader.
- // FIXME: Improve modeling for canonical IV start values in the epilogue
- // loop.
- using namespace llvm::PatternMatch;
- PHINode *EPResumeVal = &*L->getLoopPreheader()->phis().begin();
- for (Value *Inc : EPResumeVal->incoming_values()) {
- if (match(Inc, m_SpecificInt(0)))
- continue;
- assert(!EPI.VectorTripCount &&
- "Must only have a single non-zero incoming value");
- EPI.VectorTripCount = Inc;
- }
- // If we didn't find a non-zero vector trip count, all incoming values
- // must be zero, which also means the vector trip count is zero. Pick the
- // first zero as vector trip count.
- // TODO: We should not choose VF * UF so the main vector loop is known to
- // be dead.
- if (!EPI.VectorTripCount) {
- assert(
- EPResumeVal->getNumIncomingValues() > 0 &&
- all_of(EPResumeVal->incoming_values(),
- [](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) &&
- "all incoming values must be 0");
- EPI.VectorTripCount = EPResumeVal->getOperand(0);
- }
- VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
- assert(all_of(IV->users(),
- [](const VPUser *U) {
- return isa<VPScalarIVStepsRecipe>(U) ||
- isa<VPDerivedIVRecipe>(U) ||
- cast<VPRecipeBase>(U)->isScalarCast() ||
- cast<VPInstruction>(U)->getOpcode() ==
- Instruction::Add;
- }) &&
- "the canonical IV should only be used by its increment or "
- "ScalarIVSteps when resetting the start value");
- IV->setOperand(0, VPV);
+ VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
+ // When vectorizing the epilogue loop, the canonical induction start
+ // value needs to be changed from zero to the value after the main
+ // vector loop. Find the resume value created during execution of the main
+ // VPlan. It must be the first phi in the loop preheader.
+ // FIXME: Improve modeling for canonical IV start values in the epilogue
+ // loop.
+ using namespace llvm::PatternMatch;
+ PHINode *EPResumeVal = &*L->getLoopPreheader()->phis().begin();
+ for (Value *Inc : EPResumeVal->incoming_values()) {
+ if (match(Inc, m_SpecificInt(0)))
continue;
- }
+ assert(!EPI.VectorTripCount &&
+ "Must only have a single non-zero incoming value");
+ EPI.VectorTripCount = Inc;
+ }
+ // If we didn't find a non-zero vector trip count, all incoming values
+ // must be zero, which also means the vector trip count is zero. Pick the
+ // first zero as vector trip count.
+ // TODO: We should not choose VF * UF so the main vector loop is known to
+ // be dead.
+ if (!EPI.VectorTripCount) {
+ assert(EPResumeVal->getNumIncomingValues() > 0 &&
+ all_of(EPResumeVal->incoming_values(),
+ [](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) &&
+ "all incoming values must be 0");
+ EPI.VectorTripCount = EPResumeVal->getOperand(0);
+ }
+ VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
+ assert(all_of(IV->users(),
+ [](const VPUser *U) {
+ return isa<VPScalarIVStepsRecipe>(U) ||
+ isa<VPDerivedIVRecipe>(U) ||
+ cast<VPRecipeBase>(U)->isScalarCast() ||
+ cast<VPInstruction>(U)->getOpcode() ==
+ Instruction::Add;
+ }) &&
+ "the canonical IV should only be used by its increment or "
+ "ScalarIVSteps when resetting the start value");
+ IV->setOperand(0, VPV);
+ DenseMap<Value *, Value *> ToFrozen;
+ SmallVector<Instruction *> InstsToMove;
+ for (VPRecipeBase &R : drop_begin(Header->phis())) {
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f77d587..fedca65 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2241,10 +2241,9 @@ public:
/// TODO: If load combining is allowed in the IR optimizer, this analysis
/// may not be necessary.
bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
- bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
- ArrayRef<unsigned> Order, const TargetTransformInfo &TTI,
- const DataLayout &DL, ScalarEvolution &SE,
- const int64_t Diff, StridedPtrInfo &SPtrInfo) const;
+ bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+ Align Alignment, const int64_t Diff, Value *Ptr0,
+ Value *PtrN, StridedPtrInfo &SPtrInfo) const;
/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
@@ -6824,13 +6823,10 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
/// 4. Any pointer operand is an instruction with the users outside of the
/// current graph (for masked gathers extra extractelement instructions
/// might be required).
-bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
- ArrayRef<unsigned> Order,
- const TargetTransformInfo &TTI,
- const DataLayout &DL, ScalarEvolution &SE,
- const int64_t Diff,
- StridedPtrInfo &SPtrInfo) const {
- const size_t Sz = VL.size();
+bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+ Align Alignment, const int64_t Diff, Value *Ptr0,
+ Value *PtrN, StridedPtrInfo &SPtrInfo) const {
+ const size_t Sz = PointerOps.size();
if (Diff % (Sz - 1) != 0)
return false;
@@ -6842,7 +6838,6 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
});
const uint64_t AbsoluteDiff = std::abs(Diff);
- Type *ScalarTy = VL.front()->getType();
auto *VecTy = getWidenedType(ScalarTy, Sz);
if (IsAnyPointerUsedOutGraph ||
(AbsoluteDiff > Sz &&
@@ -6853,20 +6848,9 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
if (Diff != Stride * static_cast<int64_t>(Sz - 1))
return false;
- Align Alignment =
- cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
- ->getAlign();
- if (!TTI.isLegalStridedLoadStore(VecTy, Alignment))
+ if (!TTI->isLegalStridedLoadStore(VecTy, Alignment))
return false;
- Value *Ptr0;
- Value *PtrN;
- if (Order.empty()) {
- Ptr0 = PointerOps.front();
- PtrN = PointerOps.back();
- } else {
- Ptr0 = PointerOps[Order.front()];
- PtrN = PointerOps[Order.back()];
- }
+
// Iterate through all pointers and check if all distances are
// unique multiple of Dist.
SmallSet<int64_t, 4> Dists;
@@ -6875,14 +6859,14 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
if (Ptr == PtrN)
Dist = Diff;
else if (Ptr != Ptr0)
- Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
+ Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
// If the strides are not the same or repeated, we can't
// vectorize.
if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
break;
}
if (Dists.size() == Sz) {
- Type *StrideTy = DL.getIndexType(Ptr0->getType());
+ Type *StrideTy = DL->getIndexType(Ptr0->getType());
SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
return true;
@@ -6971,7 +6955,11 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
cast<Instruction>(V), UserIgnoreList);
}))
return LoadsState::CompressVectorize;
- if (isStridedLoad(VL, PointerOps, Order, *TTI, *DL, *SE, *Diff, SPtrInfo))
+ Align Alignment =
+ cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
+ ->getAlign();
+ if (isStridedLoad(PointerOps, ScalarTy, Alignment, *Diff, Ptr0, PtrN,
+ SPtrInfo))
return LoadsState::StridedVectorize;
}
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index ffd2e59..02eb637 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -978,6 +978,16 @@ void VPlan::execute(VPTransformState *State) {
// If the original loop is unreachable, delete it and all its blocks.
if (!ScalarPhVPBB->hasPredecessors()) {
+ // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit
+ // VPIRBBs in VPlan as well, otherwise we would retain references to deleted
+ // IR instructions.
+ for (VPIRBasicBlock *EB : getExitBlocks()) {
+ for (VPRecipeBase &R : make_early_inc_range(EB->phis())) {
+ if (R.getNumOperands() == 1)
+ R.eraseFromParent();
+ }
+ }
+
Loop *OrigLoop =
State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
auto Blocks = OrigLoop->getBlocksVector();