aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp178
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp10
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp20
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp55
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp7
6 files changed, 212 insertions, 62 deletions
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index ddb95a4..faeab95 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -40,6 +41,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InterleavedRange.h"
+#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
@@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend,
"Number of function clones created during ThinLTO backend");
STATISTIC(FunctionsClonedThinBackend,
"Number of functions that had clones created during ThinLTO backend");
+STATISTIC(
+ FunctionCloneDuplicatesThinBackend,
+ "Number of function clone duplicates detected during ThinLTO backend");
STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
"cloned) during whole program analysis");
STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
@@ -5186,19 +5191,127 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
return Changed;
}
+// Compute a SHA1 hash of the callsite and alloc version information of clone I
+// in the summary, to use in detection of duplicate clones.
+uint64_t ComputeHash(const FunctionSummary *FS, unsigned I) {
+ SHA1 Hasher;
+ // Update hash with any callsites that call non-default (non-zero) callee
+ // versions.
+ for (auto &SN : FS->callsites()) {
+ // In theory all callsites and allocs in this function should have the same
+ // number of clone entries, but handle any discrepancies gracefully below
+ // for NDEBUG builds.
+ assert(
+ SN.Clones.size() > I &&
+ "Callsite summary has fewer entries than other summaries in function");
+ if (SN.Clones.size() <= I || !SN.Clones[I])
+ continue;
+ uint8_t Data[sizeof(SN.Clones[I])];
+ support::endian::write32le(Data, SN.Clones[I]);
+ Hasher.update(Data);
+ }
+ // Update hash with any allocs that have non-default (non-None) hints.
+ for (auto &AN : FS->allocs()) {
+ // In theory all callsites and allocs in this function should have the same
+ // number of clone entries, but handle any discrepancies gracefully below
+ // for NDEBUG builds.
+ assert(AN.Versions.size() > I &&
+ "Alloc summary has fewer entries than other summaries in function");
+ if (AN.Versions.size() <= I ||
+ (AllocationType)AN.Versions[I] == AllocationType::None)
+ continue;
+ Hasher.update(ArrayRef<uint8_t>(&AN.Versions[I], 1));
+ }
+ return support::endian::read64le(Hasher.result().data());
+}
+
static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
- &FuncToAliasMap) {
+ &FuncToAliasMap,
+ FunctionSummary *FS) {
+ auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) {
+ // We might have created this when adjusting callsite in another
+ // function. It should be a declaration.
+ assert(DeclGV->isDeclaration());
+ NewGV->takeName(DeclGV);
+ DeclGV->replaceAllUsesWith(NewGV);
+ DeclGV->eraseFromParent();
+ };
+
+ // Handle aliases to this function, and create analogous alias clones to the
+ // provided clone of this function.
+ auto CloneFuncAliases = [&](Function *NewF, unsigned I) {
+ if (!FuncToAliasMap.count(&F))
+ return;
+ for (auto *A : FuncToAliasMap[&F]) {
+ std::string AliasName = getMemProfFuncName(A->getName(), I);
+ auto *PrevA = M.getNamedAlias(AliasName);
+ auto *NewA = GlobalAlias::create(A->getValueType(),
+ A->getType()->getPointerAddressSpace(),
+ A->getLinkage(), AliasName, NewF);
+ NewA->copyAttributesFrom(A);
+ if (PrevA)
+ TakeDeclNameAndReplace(PrevA, NewA);
+ }
+ };
+
// The first "clone" is the original copy, we should only call this if we
// needed to create new clones.
assert(NumClones > 1);
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
VMaps.reserve(NumClones - 1);
FunctionsClonedThinBackend++;
+
+ // Map of hash of callsite/alloc versions to the instantiated function clone
+ // (possibly the original) implementing those calls. Used to avoid
+ // instantiating duplicate function clones.
+ // FIXME: Ideally the thin link would not generate such duplicate clones to
+ // start with, but right now it happens due to phase ordering in the function
+ // assignment and possible new clones that produces. We simply make each
+ // duplicate an alias to the matching instantiated clone recorded in the map
+ // (except for available_externally which are made declarations as they would
+ // be aliases in the prevailing module, and available_externally aliases are
+ // not well supported right now).
+ DenseMap<uint64_t, Function *> HashToFunc;
+
+ // Save the hash of the original function version.
+ HashToFunc[ComputeHash(FS, 0)] = &F;
+
for (unsigned I = 1; I < NumClones; I++) {
VMaps.emplace_back(std::make_unique<ValueToValueMapTy>());
+ std::string Name = getMemProfFuncName(F.getName(), I);
+ auto Hash = ComputeHash(FS, I);
+ // If this clone would duplicate a previously seen clone, don't generate the
+ // duplicate clone body, just make an alias to satisfy any (potentially
+ // cross-module) references.
+ if (HashToFunc.contains(Hash)) {
+ FunctionCloneDuplicatesThinBackend++;
+ auto *Func = HashToFunc[Hash];
+ if (Func->hasAvailableExternallyLinkage()) {
+ // Skip these as EliminateAvailableExternallyPass does not handle
+ // available_externally aliases correctly and we end up with an
+ // available_externally alias to a declaration. Just create a
+ // declaration for now as we know we will have a definition in another
+ // module.
+ auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
+ << "created clone decl " << ore::NV("Decl", Decl.getCallee()));
+ continue;
+ }
+ auto *PrevF = M.getFunction(Name);
+ auto *Alias = GlobalAlias::create(Name, Func);
+ if (PrevF)
+ TakeDeclNameAndReplace(PrevF, Alias);
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
+ << "created clone alias " << ore::NV("Alias", Alias));
+
+ // Now handle aliases to this function, and clone those as well.
+ CloneFuncAliases(Func, I);
+ continue;
+ }
auto *NewF = CloneFunction(&F, *VMaps.back());
+ HashToFunc[Hash] = NewF;
FunctionClonesThinBackend++;
// Strip memprof and callsite metadata from clone as they are no longer
// needed.
@@ -5208,40 +5321,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
}
}
- std::string Name = getMemProfFuncName(F.getName(), I);
auto *PrevF = M.getFunction(Name);
- if (PrevF) {
- // We might have created this when adjusting callsite in another
- // function. It should be a declaration.
- assert(PrevF->isDeclaration());
- NewF->takeName(PrevF);
- PrevF->replaceAllUsesWith(NewF);
- PrevF->eraseFromParent();
- } else
+ if (PrevF)
+ TakeDeclNameAndReplace(PrevF, NewF);
+ else
NewF->setName(Name);
updateSubprogramLinkageName(NewF, Name);
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
<< "created clone " << ore::NV("NewFunction", NewF));
// Now handle aliases to this function, and clone those as well.
- if (!FuncToAliasMap.count(&F))
- continue;
- for (auto *A : FuncToAliasMap[&F]) {
- std::string Name = getMemProfFuncName(A->getName(), I);
- auto *PrevA = M.getNamedAlias(Name);
- auto *NewA = GlobalAlias::create(A->getValueType(),
- A->getType()->getPointerAddressSpace(),
- A->getLinkage(), Name, NewF);
- NewA->copyAttributesFrom(A);
- if (PrevA) {
- // We might have created this when adjusting callsite in another
- // function. It should be a declaration.
- assert(PrevA->isDeclaration());
- NewA->takeName(PrevA);
- PrevA->replaceAllUsesWith(NewA);
- PrevA->eraseFromParent();
- }
- }
+ CloneFuncAliases(NewF, I);
}
return VMaps;
}
@@ -5401,7 +5491,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
bool ClonesCreated = false;
unsigned NumClonesCreated = 0;
- auto CloneFuncIfNeeded = [&](unsigned NumClones) {
+ auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) {
// We should at least have version 0 which is the original copy.
assert(NumClones > 0);
// If only one copy needed use original.
@@ -5415,7 +5505,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
assert(NumClonesCreated == NumClones);
return;
}
- VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap);
+ VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap, FS);
// The first "clone" is the original copy, which doesn't have a VMap.
assert(VMaps.size() == NumClones - 1);
Changed = true;
@@ -5424,9 +5514,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
};
auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
- Function *CalledFunction) {
+ Function *CalledFunction, FunctionSummary *FS) {
// Perform cloning if not yet done.
- CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
+ CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size(), FS);
assert(!isMemProfClone(*CalledFunction));
@@ -5448,6 +5538,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// below.
auto CalleeOrigName = CalledFunction->getName();
for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
+ // If the VMap is empty, this clone was a duplicate of another and was
+ // created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Do nothing if this version calls the original version of its
// callee.
if (!StackNode.Clones[J])
@@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
#endif
// Perform cloning if not yet done.
- CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());
+ CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size(), FS);
OrigAllocsThinBackend++;
AllocVersionsThinBackend += AllocNode.Versions.size();
@@ -5624,6 +5718,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// Update the allocation types per the summary info.
for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
+ // If the VMap is empty, this clone was a duplicate of another and
+ // was created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Ignore any that didn't get an assigned allocation type.
if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
continue;
@@ -5670,7 +5768,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// we don't need to do ICP, but might need to clone this
// function as it is the target of other cloned calls.
if (NumClones)
- CloneFuncIfNeeded(NumClones);
+ CloneFuncIfNeeded(NumClones, FS);
}
else {
@@ -5690,7 +5788,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
}
#endif
- CloneCallsite(StackNode, CB, CalledFunction);
+ CloneCallsite(StackNode, CB, CalledFunction, FS);
}
} else if (CB->isTailCall() && CalledFunction) {
// Locate the synthesized callsite info for the callee VI, if any was
@@ -5700,7 +5798,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) {
auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI);
assert(Callsite != MapTailCallCalleeVIToCallsite.end());
- CloneCallsite(Callsite->second, CB, CalledFunction);
+ CloneCallsite(Callsite->second, CB, CalledFunction, FS);
}
}
}
@@ -5846,6 +5944,10 @@ void MemProfContextDisambiguation::performICP(
// check.
CallBase *CBClone = CB;
for (unsigned J = 0; J < NumClones; J++) {
+ // If the VMap is empty, this clone was a duplicate of another and was
+ // created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Copy 0 is the original function.
if (J > 0)
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
@@ -5891,6 +5993,10 @@ void MemProfContextDisambiguation::performICP(
// TotalCount and the number promoted.
CallBase *CBClone = CB;
for (unsigned J = 0; J < NumClones; J++) {
+ // If the VMap is empty, this clone was a duplicate of another and was
+ // created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Copy 0 is the original function.
if (J > 0)
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cf6d0ec..e1e24a9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -318,18 +318,18 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
// * Single constant active lane -> store
// * Narrow width by halfs excluding zero/undef lanes
Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
+ Value *StorePtr = II.getArgOperand(1);
+ Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
if (!ConstMask)
return nullptr;
// If the mask is all zeros, this instruction does nothing.
- if (ConstMask->isNullValue())
+ if (maskIsAllZeroOrUndef(ConstMask))
return eraseInstFromFunction(II);
// If the mask is all ones, this is a plain vector store of the 1st argument.
- if (ConstMask->isAllOnesValue()) {
- Value *StorePtr = II.getArgOperand(1);
- Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
+ if (maskIsAllOneOrUndef(ConstMask)) {
StoreInst *S =
new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
S->copyMetadata(II);
@@ -389,7 +389,7 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
return nullptr;
// If the mask is all zeros, a scatter does nothing.
- if (ConstMask->isNullValue())
+ if (maskIsAllZeroOrUndef(ConstMask))
return eraseInstFromFunction(II);
// Vector splat address -> scalar store
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 87000a1..3df448d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -50,6 +50,9 @@
using namespace llvm;
using namespace PatternMatch;
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
/// Replace a select operand based on an equality comparison with the identity
/// constant of a binop.
@@ -4492,8 +4495,21 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
auto FoldSelectWithAndOrCond = [&](bool IsAnd, Value *A,
Value *B) -> Instruction * {
if (Value *V = simplifySelectInst(B, TrueVal, FalseVal,
- SQ.getWithInstruction(&SI)))
- return SelectInst::Create(A, IsAnd ? V : TrueVal, IsAnd ? FalseVal : V);
+ SQ.getWithInstruction(&SI))) {
+ Value *NewTrueVal = IsAnd ? V : TrueVal;
+ Value *NewFalseVal = IsAnd ? FalseVal : V;
+
+ // If the True and False values don't change, then preserve the branch
+ // metadata of the original select as the net effect of this change is to
+ // simplify the conditional.
+ Instruction *MDFrom = nullptr;
+ if (NewTrueVal == TrueVal && NewFalseVal == FalseVal &&
+ !ProfcheckDisableMetadataFixes) {
+ MDFrom = &SI;
+ }
+ return SelectInst::Create(A, NewTrueVal, NewFalseVal, "", nullptr,
+ MDFrom);
+ }
// Is (select B, T, F) a SPF?
if (CondVal->hasOneUse() && SelType->isIntOrIntVectorTy()) {
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 9d4fb79..d6b7633 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1646,10 +1646,6 @@ NewGVN::performSymbolicPredicateInfoEvaluation(BitCastInst *I) const {
// Evaluate read only and pure calls, and create an expression result.
NewGVN::ExprResult NewGVN::performSymbolicCallEvaluation(Instruction *I) const {
auto *CI = cast<CallInst>(I);
- if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- if (auto *ReturnedValue = II->getReturnedArgOperand())
- return ExprResult::some(createVariableOrConstant(ReturnedValue));
- }
// FIXME: Currently the calls which may access the thread id may
// be considered as not accessing the memory. But this is
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 8bba634..48055ad 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5152,14 +5152,18 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
if (ExtraCase && Values.size() < 2)
return false;
- // TODO: Preserve branch weight metadata, similarly to how
- // foldValueComparisonIntoPredecessors preserves it.
+ SmallVector<uint32_t> BranchWeights;
+ const bool HasProfile = !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*BI, BranchWeights);
// Figure out which block is which destination.
BasicBlock *DefaultBB = BI->getSuccessor(1);
BasicBlock *EdgeBB = BI->getSuccessor(0);
- if (!TrueWhenEqual)
+ if (!TrueWhenEqual) {
std::swap(DefaultBB, EdgeBB);
+ if (HasProfile)
+ std::swap(BranchWeights[0], BranchWeights[1]);
+ }
BasicBlock *BB = BI->getParent();
@@ -5190,10 +5194,11 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
ExtraCase = Builder.CreateFreeze(ExtraCase);
- if (TrueWhenEqual)
- Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
- else
- Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+ // We don't have any info about this condition.
+ auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
+ : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Br, *NewBB->getParent(),
+ DEBUG_TYPE);
OldTI->eraseFromParent();
@@ -5220,6 +5225,17 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
// Create the new switch instruction now.
SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
+ if (HasProfile) {
+ // We know the weight of the default case. We don't know the weight of the
+ // other cases, but rather than completely lose profiling info, we split
+ // the remaining probability equally over them.
+ SmallVector<uint32_t> NewWeights(Values.size() + 1);
+ NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped if
+ // TrueWhenEqual.
+ for (auto &V : drop_begin(NewWeights))
+ V = BranchWeights[0] / Values.size();
+ setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
+ }
// Add all of the 'cases' to the switch instruction.
for (ConstantInt *Val : Values)
@@ -7211,6 +7227,7 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
BranchInst *RangeCheckBranch = nullptr;
+ BranchInst *CondBranch = nullptr;
Builder.SetInsertPoint(SI);
const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
@@ -7225,6 +7242,7 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
RangeCheckBranch =
Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+ CondBranch = RangeCheckBranch;
if (DTU)
Updates.push_back({DominatorTree::Insert, BB, LookupBB});
}
@@ -7263,7 +7281,7 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
Value *LoBit = Builder.CreateTrunc(
Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
- Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
+ CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
if (DTU) {
Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
@@ -7303,19 +7321,32 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder,
if (DTU)
Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
+ SmallVector<uint32_t> BranchWeights;
+ const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*SI, BranchWeights);
+ uint64_t ToLookupWeight = 0;
+ uint64_t ToDefaultWeight = 0;
+
// Remove the switch.
SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
- for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
- BasicBlock *Succ = SI->getSuccessor(i);
+ for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
+ BasicBlock *Succ = SI->getSuccessor(I);
- if (Succ == SI->getDefaultDest())
+ if (Succ == SI->getDefaultDest()) {
+ if (HasBranchWeights)
+ ToDefaultWeight += BranchWeights[I];
continue;
+ }
Succ->removePredecessor(BB);
if (DTU && RemovedSuccessors.insert(Succ).second)
Updates.push_back({DominatorTree::Delete, BB, Succ});
+ if (HasBranchWeights)
+ ToLookupWeight += BranchWeights[I];
}
SI->eraseFromParent();
-
+ if (HasBranchWeights)
+ setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
+ /*IsExpected=*/false);
if (DTU)
DTU->applyUpdates(Updates);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 43d61f2..a88cffc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3298,10 +3298,11 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo);
Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF);
-
+ bool UsedByLoadStoreAddress = isUsedByLoadStoreAddress(this);
InstructionCost ScalarCost =
ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
- PtrTy, &Ctx.SE, nullptr, Ctx.CostKind);
+ PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE,
+ nullptr, Ctx.CostKind);
if (isSingleScalar())
return ScalarCost;
@@ -3312,7 +3313,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
// vectorized addressing or the loaded value is used as part of an address
// of another load or store.
bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing();
- if (PreferVectorizedAddressing || !isUsedByLoadStoreAddress(this)) {
+ if (PreferVectorizedAddressing || !UsedByLoadStoreAddress) {
bool EfficientVectorLoadStore =
Ctx.TTI.supportsEfficientVectorElementLoadStore();
if (!(IsLoad && !PreferVectorizedAddressing) &&