aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp42
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp6
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp21
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp11
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp12
-rw-r--r--llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h30
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp22
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp133
22 files changed, 204 insertions, 114 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 8d9933b..92fca90 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3496,7 +3496,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (isPowerOf2_64(AlignMask + 1)) {
uint64_t Offset = 0;
match(A, m_Add(m_Value(A), m_ConstantInt(Offset)));
- if (match(A, m_PtrToInt(m_Value(A)))) {
+ if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
/// Note: this doesn't preserve the offset information but merges
/// offset and alignment.
/// TODO: we can generate a GEP instead of merging the alignment with
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 9b9fe26..614c6eb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1525,7 +1525,15 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
}
// Try to extend the entire expression tree to the wide destination type.
- if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
+ bool ShouldExtendExpression = true;
+ Value *TruncSrc = nullptr;
+ // It is not desirable to extend expression in the trunc + sext pattern when
+ // destination type is narrower than original (pre-trunc) type.
+ if (match(Src, m_Trunc(m_Value(TruncSrc))))
+ if (TruncSrc->getType()->getScalarSizeInBits() > DestBitSize)
+ ShouldExtendExpression = false;
+ if (ShouldExtendExpression && shouldChangeType(SrcTy, DestTy) &&
+ canEvaluateSExtd(Src, DestTy)) {
// Okay, we can transform this! Insert the new expression now.
LLVM_DEBUG(
dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -1545,13 +1553,18 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
ShAmt);
}
- Value *X;
- if (match(Src, m_Trunc(m_Value(X)))) {
+ Value *X = TruncSrc;
+ if (X) {
// If the input has more sign bits than bits truncated, then convert
// directly to final type.
unsigned XBitSize = X->getType()->getScalarSizeInBits();
- if (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)
- return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
+ bool HasNSW = cast<TruncInst>(Src)->hasNoSignedWrap();
+ if (HasNSW || (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)) {
+ auto *Res = CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
+ if (auto *ResTrunc = dyn_cast<TruncInst>(Res); ResTrunc && HasNSW)
+ ResTrunc->setHasNoSignedWrap(true);
+ return Res;
+ }
// If input is a trunc from the destination type, then convert into shifts.
if (Src->hasOneUse() && X->getType() == DestTy) {
@@ -2135,7 +2148,7 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
return nullptr;
}
-Value *InstCombinerImpl::foldPtrToIntOfGEP(Type *IntTy, Value *Ptr) {
+Value *InstCombinerImpl::foldPtrToIntOrAddrOfGEP(Type *IntTy, Value *Ptr) {
// Look through chain of one-use GEPs.
Type *PtrTy = Ptr->getType();
SmallVector<GEPOperator *> GEPs;
@@ -2197,7 +2210,7 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
Mask->getType() == Ty)
return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
- if (Value *V = foldPtrToIntOfGEP(Ty, SrcOp))
+ if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
return replaceInstUsesWith(CI, V);
Value *Vec, *Scalar, *Index;
@@ -2215,6 +2228,21 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
}
Instruction *InstCombinerImpl::visitPtrToAddr(PtrToAddrInst &CI) {
+ Value *SrcOp = CI.getPointerOperand();
+ Type *Ty = CI.getType();
+
+ // (ptrtoaddr (ptrmask P, M))
+ // -> (and (ptrtoaddr P), M)
+ // This is generally beneficial as `and` is better supported than `ptrmask`.
+ Value *Ptr, *Mask;
+ if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),
+ m_Value(Mask)))) &&
+ Mask->getType() == Ty)
+ return BinaryOperator::CreateAnd(Builder.CreatePtrToAddr(Ptr), Mask);
+
+ if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
+ return replaceInstUsesWith(CI, V);
+
// FIXME: Implement variants of ptrtoint folds.
return commonCastTransforms(CI);
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 9c75d9a..d85e4f7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -700,7 +700,7 @@ public:
/// folded operation.
void PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN);
- Value *foldPtrToIntOfGEP(Type *IntTy, Value *Ptr);
+ Value *foldPtrToIntOrAddrOfGEP(Type *IntTy, Value *Ptr);
Instruction *foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, CmpPredicate Cond,
Instruction &I);
Instruction *foldSelectICmp(CmpPredicate Pred, SelectInst *SI, Value *RHS,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 651e305..550dfc5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -105,6 +105,8 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I,
if (~KnownShrBits.Zero != ShlAmt)
return nullptr;
+ IRBuilderBase::InsertPointGuard Guard(IC.Builder);
+ IC.Builder.SetInsertPoint(I);
Value *ShrAmtZ =
IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(ShrAmt->getType()),
ShrAmt->getName() + ".z");
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index cb6ca72..7c364f8 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1539,7 +1539,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
IID == Intrinsic::experimental_vp_strided_load) {
Stride = VPI->getOperand(PtrOpNo + 1);
// Use the pointer alignment as the element alignment if the stride is a
- // mutiple of the pointer alignment. Otherwise, the element alignment
+ // multiple of the pointer alignment. Otherwise, the element alignment
// should be Align(1).
unsigned PointerAlign = Alignment.valueOrOne().value();
if (!isa<ConstantInt>(Stride) ||
@@ -2399,7 +2399,7 @@ void ModuleAddressSanitizer::instrumentGlobalsELF(
// Putting globals in a comdat changes the semantic and potentially cause
// false negative odr violations at link time. If odr indicators are used, we
- // keep the comdat sections, as link time odr violations will be dectected on
+ // keep the comdat sections, as link time odr violations will be detected on
// the odr indicator symbols.
bool UseComdatForGlobalsGC = UseOdrIndicator && !UniqueModuleId.empty();
@@ -3858,7 +3858,7 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
I->eraseFromParent();
}
- // Replace all uses of AddessReturnedByAlloca with NewAddressPtr.
+ // Replace all uses of AddressReturnedByAlloca with NewAddressPtr.
AI->replaceAllUsesWith(NewAddressPtr);
// We are done. Erase old alloca from parent.
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 72e8e50..0688bc7 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -359,7 +359,7 @@ class CHR {
unsigned Count = 0;
// Find out how many times region R is cloned. Note that if the parent
// of R is cloned, R is also cloned, but R's clone count is not updated
- // from the clone of the parent. We need to accumlate all the counts
+ // from the clone of the parent. We need to accumulate all the counts
// from the ancestors to get the clone count.
while (R) {
Count += DuplicationCount[R];
@@ -1513,7 +1513,7 @@ static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
BI->swapSuccessors();
// Don't need to swap this in terms of
// TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
- // mean whehter the branch is likely go into the if-then rather than
+ // mean whether the branch is likely go into the if-then rather than
// successor0/successor1 and because we can tell which edge is the then or
// the else one by comparing the destination to the region exit block.
continue;
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index cf87e35..1e5946a 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -83,7 +83,7 @@ static cl::opt<unsigned>
// ICP the candidate function even when only a declaration is present.
static cl::opt<bool> ICPAllowDecls(
"icp-allow-decls", cl::init(false), cl::Hidden,
- cl::desc("Promote the target candidate even when the defintion "
+ cl::desc("Promote the target candidate even when the definition "
" is not available"));
// ICP hot candidate functions only. When setting to false, non-cold functions
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 5e7548b..7795cce 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -139,7 +139,7 @@ cl::opt<bool> ConditionalCounterUpdate(
cl::init(false));
// If the option is not specified, the default behavior about whether
-// counter promotion is done depends on how instrumentaiton lowering
+// counter promotion is done depends on how instrumentation lowering
// pipeline is setup, i.e., the default value of true of this option
// does not mean the promotion will be done by default. Explicitly
// setting this option can override the default behavior.
@@ -1052,7 +1052,7 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
GlobalVariable *Name = Ind->getName();
auto It = ProfileDataMap.find(Name);
assert(It != ProfileDataMap.end() && It->second.DataVar &&
- "value profiling detected in function with no counter incerement");
+ "value profiling detected in function with no counter increment");
GlobalVariable *DataVar = It->second.DataVar;
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
index 3c0f185..05616d8 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
@@ -490,7 +490,7 @@ void createProfileFileNameVar(Module &M) {
}
}
-// Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible
+// Set MemprofHistogramFlag as a Global variable in IR. This makes it accessible
// to the runtime, changing shadow count behavior.
void createMemprofHistogramFlagVar(Module &M) {
const StringRef VarName(MemProfHistogramFlagVar);
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 10b03bb..471c6ec 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3136,7 +3136,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// If we don't instrument it and it gets inlined,
/// our interceptor will not kick in and we will lose the memmove.
/// If we instrument the call here, but it does not get inlined,
- /// we will memove the shadow twice: which is bad in case
+ /// we will memmove the shadow twice: which is bad in case
/// of overlapping regions. So, we simply lower the intrinsic to a call.
///
/// Similar situation exists for memcpy and memset.
@@ -4775,7 +4775,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// _mm_round_ps / _mm_round_ps.
// Similar to maybeHandleSimpleNomemIntrinsic except
- // the second argument is guranteed to be a constant integer.
+ // the second argument is guaranteed to be a constant integer.
void handleRoundPdPsIntrinsic(IntrinsicInst &I) {
assert(I.getArgOperand(0)->getType() == I.getType());
assert(I.arg_size() == 2);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index f5b6686..5f87ed6 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -176,7 +176,7 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
assert(areAllBBsReachable(
F, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M)
.getManager()) &&
- "Function has unreacheable basic blocks. The expectation was that "
+ "Function has unreachable basic blocks. The expectation was that "
"DCE was run before.");
auto It = FlattenedProfile.find(AssignGUIDPass::getGUID(F));
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
index 0a358d4..de7c169 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -253,7 +253,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
Value *RealContext = nullptr;
StructType *ThisContextType = nullptr;
- Value *TheRootFuctionData = nullptr;
+ Value *TheRootFunctionData = nullptr;
Value *ExpectedCalleeTLSAddr = nullptr;
Value *CallsiteInfoTLSAddr = nullptr;
const bool HasMusttail = [&F]() {
@@ -283,7 +283,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
Guid = Builder.getInt64(
AssignGUIDPass::getGUID(cast<Function>(*Mark->getNameValue())));
// The type of the context of this function is now knowable since we have
- // NumCallsites and NumCounters. We delcare it here because it's more
+ // NumCallsites and NumCounters. We declare it here because it's more
// convenient - we have the Builder.
ThisContextType = StructType::get(
F.getContext(),
@@ -291,28 +291,27 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
ArrayType::get(Builder.getPtrTy(), NumCallsites)});
// Figure out which way we obtain the context object for this function -
// if it's an entrypoint, then we call StartCtx, otherwise GetCtx. In the
- // former case, we also set TheRootFuctionData since we need to release it
- // at the end (plus it can be used to know if we have an entrypoint or a
- // regular function)
- // Don't set a name, they end up taking a lot of space and we don't need
- // them.
+ // former case, we also set TheRootFunctionData since we need to release
+ // it at the end (plus it can be used to know if we have an entrypoint or
+ // a regular function). Don't set a name, they end up taking a lot of
+ // space and we don't need them.
// Zero-initialize the FunctionData, except for functions that have
// musttail calls. There, we set the CtxRoot field to 1, which will be
// treated as a "can't be set as root".
- TheRootFuctionData = new GlobalVariable(
+ TheRootFunctionData = new GlobalVariable(
M, FunctionDataTy, false, GlobalVariable::InternalLinkage,
HasMusttail ? CannotBeRootInitializer
: Constant::getNullValue(FunctionDataTy));
if (ContextRootSet.contains(&F)) {
Context = Builder.CreateCall(
- StartCtx, {TheRootFuctionData, Guid, Builder.getInt32(NumCounters),
+ StartCtx, {TheRootFunctionData, Guid, Builder.getInt32(NumCounters),
Builder.getInt32(NumCallsites)});
ORE.emit(
[&] { return OptimizationRemark(DEBUG_TYPE, "Entrypoint", &F); });
} else {
- Context = Builder.CreateCall(GetCtx, {TheRootFuctionData, &F, Guid,
+ Context = Builder.CreateCall(GetCtx, {TheRootFunctionData, &F, Guid,
Builder.getInt32(NumCounters),
Builder.getInt32(NumCallsites)});
ORE.emit([&] {
@@ -399,7 +398,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
} else if (!HasMusttail && isa<ReturnInst>(I)) {
// Remember to release the context if we are an entrypoint.
IRBuilder<> Builder(&I);
- Builder.CreateCall(ReleaseCtx, {TheRootFuctionData});
+ Builder.CreateCall(ReleaseCtx, {TheRootFunctionData});
ContextWasReleased = true;
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 120c4f6..71736cf 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1957,7 +1957,7 @@ static bool InstrumentAllFunctions(
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
function_ref<LoopInfo *(Function &)> LookupLI,
PGOInstrumentationType InstrumentationType) {
- // For the context-sensitve instrumentation, we should have a separated pass
+ // For the context-sensitive instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (InstrumentationType == PGOInstrumentationType::FDO)
createIRLevelProfileFlagVar(M, InstrumentationType);
@@ -2248,7 +2248,7 @@ static bool annotateAllFunctions(
Func.populateCoverage();
continue;
}
- // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
+ // When PseudoKind is set to a value other than InstrProfRecord::NotPseudo,
// it means the profile for the function is unrepresentative and this
// function is actually hot / warm. We will reset the function hot / cold
// attribute and drop all the profile counters.
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
index 4801ac7..210b126 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
@@ -481,15 +481,18 @@ StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
} // namespace
SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
- SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)
- : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}
+ SanitizerBinaryMetadataOptions Opts,
+ IntrusiveRefCntPtr<vfs::FileSystem> VFS,
+ ArrayRef<std::string> IgnorelistFiles)
+ : Options(std::move(Opts)),
+ VFS(VFS ? std::move(VFS) : vfs::getRealFileSystem()),
+ IgnorelistFiles(std::move(IgnorelistFiles)) {}
PreservedAnalyses
SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
std::unique_ptr<SpecialCaseList> Ignorelist;
if (!IgnorelistFiles.empty()) {
- Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles,
- *vfs::getRealFileSystem());
+ Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles, *VFS);
if (Ignorelist->inSection("metadata", "src", M.getSourceFileName()))
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index b74a070..09abf6a 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -318,6 +318,18 @@ private:
};
} // namespace
+SanitizerCoveragePass::SanitizerCoveragePass(
+ SanitizerCoverageOptions Options, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
+ const std::vector<std::string> &AllowlistFiles,
+ const std::vector<std::string> &BlocklistFiles)
+ : Options(std::move(Options)),
+ VFS(VFS ? std::move(VFS) : vfs::getRealFileSystem()) {
+ if (AllowlistFiles.size() > 0)
+ Allowlist = SpecialCaseList::createOrDie(AllowlistFiles, *this->VFS);
+ if (BlocklistFiles.size() > 0)
+ Blocklist = SpecialCaseList::createOrDie(BlocklistFiles, *this->VFS);
+}
+
PreservedAnalyses SanitizerCoveragePass::run(Module &M,
ModuleAnalysisManager &MAM) {
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 0d48a35..fd0e9f1 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -355,7 +355,7 @@ static bool isVtableAccess(Instruction *I) {
}
// Do not instrument known races/"benign races" that come from compiler
-// instrumentatin. The user has no way of suppressing them.
+// instrumentation. The user has no way of suppressing them.
static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) {
// Peel off GEPs and BitCasts.
Addr = Addr->stripInBoundsOffsets();
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index 9471ae3..78d4a57e 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -643,7 +643,7 @@ bool TypeSanitizer::instrumentWithShadowUpdate(
// doesn't match, then we call the runtime (which may yet determine that
// the mismatch is okay).
//
- // The checks generated below have the following strucutre.
+ // The checks generated below have the following structure.
//
// ; First we load the descriptor for the load from shadow memory and
// ; compare it against the type descriptor for the current access type.
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 371d9e6..a9ab3b3 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -819,7 +819,7 @@ public:
OS << "]";
} else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) {
OS << "; switch predicate info { CaseValue: " << *PS->CaseValue
- << " Switch:" << *PS->Switch << " Edge: [";
+ << " Edge: [";
PS->From->printAsOperand(OS);
OS << ",";
PS->To->printAsOperand(OS);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index facb0fa..f7968ab 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7543,12 +7543,13 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
}
if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
- VPIRMetadata(*Load, LVer), I->getDebugLoc());
+ Load->getAlign(), VPIRMetadata(*Load, LVer),
+ I->getDebugLoc());
StoreInst *Store = cast<StoreInst>(I);
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
- Reverse, VPIRMetadata(*Store, LVer),
- I->getDebugLoc());
+ Reverse, Store->getAlign(),
+ VPIRMetadata(*Store, LVer), I->getDebugLoc());
}
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 5b9f005..1f10058 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3179,6 +3179,9 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
protected:
Instruction &Ingredient;
+ /// Alignment information for this memory access.
+ Align Alignment;
+
/// Whether the accessed addresses are consecutive.
bool Consecutive;
@@ -3198,10 +3201,10 @@ protected:
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
std::initializer_list<VPValue *> Operands,
- bool Consecutive, bool Reverse,
+ bool Consecutive, bool Reverse, Align Alignment,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
- Consecutive(Consecutive), Reverse(Reverse) {
+ Alignment(Alignment), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
}
@@ -3242,6 +3245,9 @@ public:
return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
}
+ /// Returns the alignment of the memory access.
+ Align getAlign() const { return Alignment; }
+
/// Generate the wide load/store.
void execute(VPTransformState &State) override {
llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
@@ -3259,18 +3265,18 @@ public:
struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
public VPValue {
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
- bool Consecutive, bool Reverse,
+ bool Consecutive, bool Reverse, Align Alignment,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
- Reverse, Metadata, DL),
+ Reverse, Alignment, Metadata, DL),
VPValue(this, &Load) {
setMask(Mask);
}
VPWidenLoadRecipe *clone() override {
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
- getMask(), Consecutive, Reverse, *this,
- getDebugLoc());
+ getMask(), Consecutive, Reverse, getAlign(),
+ *this, getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
@@ -3301,8 +3307,8 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL,
VPValue *Mask)
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
- {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
- L.getDebugLoc()),
+ {Addr, &EVL}, L.isConsecutive(), L.isReverse(),
+ L.getAlign(), L, L.getDebugLoc()),
VPValue(this, &getIngredient()) {
setMask(Mask);
}
@@ -3340,16 +3346,16 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
VPValue *Mask, bool Consecutive, bool Reverse,
- const VPIRMetadata &Metadata, DebugLoc DL)
+ Align Alignment, const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
- Consecutive, Reverse, Metadata, DL) {
+ Consecutive, Reverse, Alignment, Metadata, DL) {
setMask(Mask);
}
VPWidenStoreRecipe *clone() override {
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
getStoredValue(), getMask(), Consecutive,
- Reverse, *this, getDebugLoc());
+ Reverse, getAlign(), *this, getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -3384,7 +3390,7 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
VPValue *Mask)
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
{Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
- S.isReverse(), S, S.getDebugLoc()) {
+ S.isReverse(), S.getAlign(), S, S.getDebugLoc()) {
setMask(Mask);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 931a5b7..9a63c80 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -70,6 +70,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
case VPCanonicalIVPHISC:
case VPBranchOnMaskSC:
+ case VPDerivedIVSC:
case VPFirstOrderRecurrencePHISC:
case VPReductionPHISC:
case VPScalarIVStepsSC:
@@ -86,6 +87,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenLoadEVLSC:
case VPWidenLoadSC:
case VPWidenPHISC:
+ case VPWidenPointerInductionSC:
case VPWidenSC:
case VPWidenSelectSC: {
const Instruction *I =
@@ -119,6 +121,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenIntrinsicSC:
return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
case VPBranchOnMaskSC:
+ case VPDerivedIVSC:
case VPFirstOrderRecurrencePHISC:
case VPPredInstPHISC:
case VPScalarIVStepsSC:
@@ -134,6 +137,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
+ case VPWidenPointerInductionSC:
case VPWidenSC:
case VPWidenSelectSC: {
const Instruction *I =
@@ -3358,7 +3362,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));
Type *ScalarPtrTy = Ctx.Types.inferScalarType(PtrOp);
const Align Alignment = getLoadStoreAlignment(UI);
- unsigned AS = getLoadStoreAddressSpace(UI);
+ unsigned AS = cast<PointerType>(ScalarPtrTy)->getAddressSpace();
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0));
InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost(
UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo);
@@ -3525,7 +3529,6 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
->getAddressSpace();
unsigned Opcode = isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this)
@@ -3575,7 +3578,6 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
void VPWidenLoadRecipe::execute(VPTransformState &State) {
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
bool CreateGather = !isConsecutive();
auto &Builder = State.Builder;
@@ -3630,7 +3632,6 @@ static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
bool CreateGather = !isConsecutive();
auto &Builder = State.Builder;
@@ -3674,8 +3675,8 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
// TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
// don't need to compare to the legacy cost model.
Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
- unsigned AS = getLoadStoreAddressSpace(&Ingredient);
+ unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
+ ->getAddressSpace();
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
Instruction::Load, Ty, Alignment, AS, Ctx.CostKind);
if (!Reverse)
@@ -3699,7 +3700,6 @@ void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
void VPWidenStoreRecipe::execute(VPTransformState &State) {
VPValue *StoredVPValue = getStoredValue();
bool CreateScatter = !isConsecutive();
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
auto &Builder = State.Builder;
@@ -3742,7 +3742,6 @@ void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
VPValue *StoredValue = getStoredValue();
bool CreateScatter = !isConsecutive();
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
auto &Builder = State.Builder;
@@ -3785,8 +3784,8 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
// TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
// don't need to compare to the legacy cost model.
Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
- unsigned AS = getLoadStoreAddressSpace(&Ingredient);
+ unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
+ ->getAddressSpace();
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
Instruction::Store, Ty, Alignment, AS, Ctx.CostKind);
if (!Reverse)
@@ -4252,7 +4251,8 @@ InstructionCost VPInterleaveBase::computeCost(ElementCount VF,
getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
: getStoredValues()[InsertPosIdx]);
auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
- unsigned AS = getLoadStoreAddressSpace(InsertPos);
+ unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
+ ->getAddressSpace();
unsigned InterleaveFactor = IG->getFactor();
auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 84817d7..d9ac26bb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -91,13 +91,14 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
NewRecipe = new VPWidenLoadRecipe(
*Load, Ingredient.getOperand(0), nullptr /*Mask*/,
- false /*Consecutive*/, false /*Reverse*/, VPIRMetadata(*Load),
- Ingredient.getDebugLoc());
+ false /*Consecutive*/, false /*Reverse*/, Load->getAlign(),
+ VPIRMetadata(*Load), Ingredient.getDebugLoc());
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
NewRecipe = new VPWidenStoreRecipe(
*Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
- VPIRMetadata(*Store), Ingredient.getDebugLoc());
+ Store->getAlign(), VPIRMetadata(*Store),
+ Ingredient.getDebugLoc());
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
@@ -130,6 +131,24 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
return true;
}
+/// Return true if we do not know how to (mechanically) hoist or sink \p R out
+/// of a loop region.
+static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) {
+ // Assumes don't alias anything or throw; as long as they're guaranteed to
+ // execute, they're safe to hoist.
+ if (match(&R, m_Intrinsic<Intrinsic::assume>()))
+ return false;
+
+ // TODO: Relax checks in the future, e.g. we could also hoist reads, if their
+ // memory location is not modified in the vector loop.
+ if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
+ return true;
+
+ // Allocas cannot be hoisted.
+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+ return RepR && RepR->getOpcode() == Instruction::Alloca;
+}
+
static bool sinkScalarOperands(VPlan &Plan) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
bool Changed = false;
@@ -1825,7 +1844,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
VPDT.properlyDominates(Previous, SinkCandidate))
return true;
- if (SinkCandidate->mayHaveSideEffects())
+ if (cannotHoistOrSinkRecipe(*SinkCandidate))
return false;
WorkList.push_back(SinkCandidate);
@@ -1865,7 +1884,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
VPRecipeBase *Previous,
VPDominatorTree &VPDT) {
- if (Previous->mayHaveSideEffects() || Previous->mayReadFromMemory())
+ if (cannotHoistOrSinkRecipe(*Previous))
return false;
// Collect recipes that need hoisting.
@@ -1912,11 +1931,6 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
return nullptr;
return HoistCandidate;
};
- auto CanHoist = [&](VPRecipeBase *HoistCandidate) {
- // Avoid hoisting candidates with side-effects, as we do not yet analyze
- // associated dependencies.
- return !HoistCandidate->mayHaveSideEffects();
- };
if (!NeedsHoisting(Previous->getVPSingleValue()))
return true;
@@ -1928,7 +1942,7 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
VPRecipeBase *Current = HoistCandidates[I];
assert(Current->getNumDefinedValues() == 1 &&
"only recipes with a single defined value expected");
- if (!CanHoist(Current))
+ if (cannotHoistOrSinkRecipe(*Current))
return false;
for (VPValue *Op : Current->operands()) {
@@ -2143,24 +2157,6 @@ void VPlanTransforms::cse(VPlan &Plan) {
static void licm(VPlan &Plan) {
VPBasicBlock *Preheader = Plan.getVectorPreheader();
- // Return true if we do not know how to (mechanically) hoist a given recipe
- // out of a loop region.
- auto CannotHoistRecipe = [](VPRecipeBase &R) {
- // Assumes don't alias anything or throw; as long as they're guaranteed to
- // execute, they're safe to hoist.
- if (match(&R, m_Intrinsic<Intrinsic::assume>()))
- return false;
-
- // TODO: Relax checks in the future, e.g. we could also hoist reads, if
- // their memory location is not modified in the vector loop.
- if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
- return true;
-
- // Allocas cannot be hoisted.
- auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
- return RepR && RepR->getOpcode() == Instruction::Alloca;
- };
-
// Hoist any loop invariant recipes from the vector loop region to the
// preheader. Preform a shallow traversal of the vector loop region, to
// exclude recipes in replicate regions. Since the top-level blocks in the
@@ -2172,7 +2168,7 @@ static void licm(VPlan &Plan) {
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- if (CannotHoistRecipe(R))
+ if (cannotHoistOrSinkRecipe(R))
continue;
if (any_of(R.operands(), [](VPValue *Op) {
return !Op->isDefinedOutsideLoopRegions();
@@ -3652,6 +3648,37 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
Sub = VecOp->getDefiningRecipe();
VecOp = Tmp;
}
+
+ // If ValB is a constant and can be safely extended, truncate it to the same
+ // type as ExtA's operand, then extend it to the same type as ExtA. This
+ // creates two uniform extends that can more easily be matched by the rest of
+ // the bundling code. The ExtB reference, ValB and operand 1 of Mul are all
+ // replaced with the new extend of the constant.
+ auto ExtendAndReplaceConstantOp = [&Ctx](VPWidenCastRecipe *ExtA,
+ VPWidenCastRecipe *&ExtB,
+ VPValue *&ValB, VPWidenRecipe *Mul) {
+ if (!ExtA || ExtB || !ValB->isLiveIn())
+ return;
+ Type *NarrowTy = Ctx.Types.inferScalarType(ExtA->getOperand(0));
+ Instruction::CastOps ExtOpc = ExtA->getOpcode();
+ const APInt *Const;
+ if (!match(ValB, m_APInt(Const)) ||
+ !llvm::canConstantBeExtended(
+ Const, NarrowTy, TTI::getPartialReductionExtendKind(ExtOpc)))
+ return;
+ // The truncate ensures that the type of each extended operand is the
+ // same, and it's been proven that the constant can be extended from
+ // NarrowTy safely. Necessary since ExtA's extended operand would be
+ // e.g. an i8, while the const will likely be an i32. This will be
+ // elided by later optimisations.
+ VPBuilder Builder(Mul);
+ auto *Trunc =
+ Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
+ Type *WideTy = Ctx.Types.inferScalarType(ExtA);
+ ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
+ Mul->setOperand(1, ExtB);
+ };
+
// Try to match reduce.add(mul(...)).
if (match(VecOp, m_Mul(m_VPValue(A), m_VPValue(B)))) {
auto *RecipeA =
@@ -3660,6 +3687,9 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe());
auto *Mul = cast<VPWidenRecipe>(VecOp->getDefiningRecipe());
+ // Convert reduce.add(mul(ext, const)) to reduce.add(mul(ext, ext(const)))
+ ExtendAndReplaceConstantOp(RecipeA, RecipeB, B, Mul);
+
// Match reduce.add/sub(mul(ext, ext)).
if (RecipeA && RecipeB && match(RecipeA, m_ZExtOrSExt(m_VPValue())) &&
match(RecipeB, m_ZExtOrSExt(m_VPValue())) &&
@@ -3669,7 +3699,6 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
cast<VPWidenRecipe>(Sub), Red);
return new VPExpressionRecipe(RecipeA, RecipeB, Mul, Red);
}
- // Match reduce.add(mul).
// TODO: Add an expression type for this variant with a negated mul
if (!Sub && IsMulAccValidAndClampRange(Mul, nullptr, nullptr, nullptr))
return new VPExpressionRecipe(Mul, Red);
@@ -3678,18 +3707,26 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
// variants.
if (Sub)
return nullptr;
- // Match reduce.add(ext(mul(ext(A), ext(B)))).
- // All extend recipes must have same opcode or A == B
- // which can be transform to reduce.add(zext(mul(sext(A), sext(B)))).
- if (match(VecOp, m_ZExtOrSExt(m_Mul(m_ZExtOrSExt(m_VPValue()),
- m_ZExtOrSExt(m_VPValue()))))) {
+
+ // Match reduce.add(ext(mul(A, B))).
+ if (match(VecOp, m_ZExtOrSExt(m_Mul(m_VPValue(A), m_VPValue(B))))) {
auto *Ext = cast<VPWidenCastRecipe>(VecOp->getDefiningRecipe());
auto *Mul = cast<VPWidenRecipe>(Ext->getOperand(0)->getDefiningRecipe());
- auto *Ext0 =
- cast<VPWidenCastRecipe>(Mul->getOperand(0)->getDefiningRecipe());
- auto *Ext1 =
- cast<VPWidenCastRecipe>(Mul->getOperand(1)->getDefiningRecipe());
- if ((Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
+ auto *Ext0 = dyn_cast_if_present<VPWidenCastRecipe>(A->getDefiningRecipe());
+ auto *Ext1 = dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe());
+
+ // reduce.add(ext(mul(ext, const)))
+ // -> reduce.add(ext(mul(ext, ext(const))))
+ ExtendAndReplaceConstantOp(Ext0, Ext1, B, Mul);
+
+ // reduce.add(ext(mul(ext(A), ext(B))))
+ // -> reduce.add(mul(wider_ext(A), wider_ext(B)))
+ // The inner extends must either have the same opcode as the outer extend or
+ // be the same, in which case the multiply can never result in a negative
+ // value and the outer extend can be folded away by doing wider
+ // extends for the operands of the mul.
+ if (Ext0 && Ext1 &&
+ (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
Ext0->getOpcode() == Ext1->getOpcode() &&
IsMulAccValidAndClampRange(Mul, Ext0, Ext1, Ext) && Mul->hasOneUse()) {
auto *NewExt0 = new VPWidenCastRecipe(
@@ -4234,10 +4271,11 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
if (auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) {
// Narrow interleave group to wide load, as transformed VPlan will only
// process one original iteration.
+ auto *LI =
+ cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
auto *L = new VPWidenLoadRecipe(
- *cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()),
- LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
- /*Reverse=*/false, {}, LoadGroup->getDebugLoc());
+ *LI, LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
+ /*Reverse=*/false, LI->getAlign(), {}, LoadGroup->getDebugLoc());
L->insertBefore(LoadGroup);
NarrowedOps.insert(L);
return L;
@@ -4280,10 +4318,11 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
Res = NarrowOp(Member0);
}
+ auto *SI =
+ cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos());
auto *S = new VPWidenStoreRecipe(
- *cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos()),
- StoreGroup->getAddr(), Res, nullptr, /*Consecutive=*/true,
- /*Reverse=*/false, {}, StoreGroup->getDebugLoc());
+ *SI, StoreGroup->getAddr(), Res, nullptr, /*Consecutive=*/true,
+ /*Reverse=*/false, SI->getAlign(), {}, StoreGroup->getDebugLoc());
S->insertBefore(StoreGroup);
StoreGroup->eraseFromParent();
}