aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
diff options
context:
space:
mode:
authorRoman Lebedev <lebedev.ri@gmail.com>2022-12-12 18:01:57 +0300
committerRoman Lebedev <lebedev.ri@gmail.com>2022-12-12 18:20:03 +0300
commit1bd0b82e508d049efdb07f4f8a342f35818df341 (patch)
treef0507ad21cb78f0c32f9facc71052a929901daca /llvm/lib/Transforms/Utils/SimplifyCFG.cpp
parent647d314eb059b6d2e7c00d7eefe6a7afc37dff25 (diff)
downloadllvm-1bd0b82e508d049efdb07f4f8a342f35818df341.zip
llvm-1bd0b82e508d049efdb07f4f8a342f35818df341.tar.gz
llvm-1bd0b82e508d049efdb07f4f8a342f35818df341.tar.bz2
[SimplifyCFG] `FoldBranchToCommonDest()`: deal with mismatched IV's in PHI's in common successor block
This tries to approach the problem noted by @arsenm: terrible codegen for `__builtin_fpclassify()`: https://godbolt.org/z/388zqdE37 Just because the PHI in the common successor happens to have different incoming values for these two blocks, doesn't mean we have to give up. It's quite easy to deal with this, we just need to produce a select: https://alive2.llvm.org/ce/z/000srb Now, the cost model for this transform is rather overly strict, so this will basically never fire. We tally all (over all preds) the selects needed to the NumBonusInsts Differential Revision: https://reviews.llvm.org/D139275
Diffstat (limited to 'llvm/lib/Transforms/Utils/SimplifyCFG.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp159
1 files changed, 154 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index a9b8830..57954bc 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3567,7 +3567,67 @@ shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
return std::nullopt;
}
+struct CompatibleIncomingValue {
+ Value *V;
+ CompatibleIncomingValue() = default;
+ CompatibleIncomingValue(Value *V_) : V(V_) {}
+ operator Value *() const { return V; }
+};
+struct SelectMaterializationRecipe {
+ Value *Cond = nullptr;
+ Value *TrueVal = nullptr;
+ Value *FalseVal = nullptr;
+};
+// Maintain recipes on how to deal with each of the PHI nodes in the common
+// successor block when coming from it's two predecessors, one of which also
+// branches to the other one. For each PHI, we may either have an preexisting
+// Value that we can use when branching from the common predecessor, or we have
+// (an uniqued!) recipe on how to compute such a value via a select.
+// This cache is unique, and is maintained per the predecessor block.
+struct SelectCache {
+ using UniqueSelectIndex = unsigned;
+ SmallDenseMap<SelectMaterializationRecipe, UniqueSelectIndex, 2> SelectCSE;
+ SmallVector<std::variant<CompatibleIncomingValue, UniqueSelectIndex>, 2> Phis;
+};
+
+static bool operator==(const SelectMaterializationRecipe &A,
+ const SelectMaterializationRecipe &B) {
+ return std::tie(A.Cond, A.TrueVal, A.FalseVal) ==
+ std::tie(B.Cond, B.TrueVal, B.FalseVal);
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<SelectMaterializationRecipe> {
+ static SelectMaterializationRecipe getEmptyKey() {
+ SelectMaterializationRecipe R;
+ for (Value **E : {&R.Cond, &R.TrueVal, &R.FalseVal})
+ *E = DenseMapInfo<Value *>::getEmptyKey();
+ return R;
+ }
+
+ static SelectMaterializationRecipe getTombstoneKey() {
+ SelectMaterializationRecipe R;
+ for (Value **E : {&R.Cond, &R.TrueVal, &R.FalseVal})
+ *E = DenseMapInfo<Value *>::getTombstoneKey();
+ return R;
+ }
+
+ static unsigned getHashValue(SelectMaterializationRecipe R) {
+ return static_cast<unsigned>(
+ hash_combine(DenseMapInfo<Value *>::getHashValue(R.Cond),
+ DenseMapInfo<Value *>::getHashValue(R.TrueVal),
+ DenseMapInfo<Value *>::getHashValue(R.FalseVal)));
+ }
+
+ static bool isEqual(SelectMaterializationRecipe LHS,
+ SelectMaterializationRecipe RHS) {
+ return LHS == RHS;
+ }
+};
+} // namespace llvm
+
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
+ SelectCache *Cache,
DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU,
const TargetTransformInfo *TTI) {
@@ -3596,6 +3656,7 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
CmpInst *CI = cast<CmpInst>(NewCond);
CI->setPredicate(CI->getInversePredicate());
+ CI->setName(CI->getName() + ".not");
} else {
NewCond =
Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
@@ -3603,6 +3664,14 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
PBI->setCondition(NewCond);
PBI->swapSuccessors();
+
+ if (Cache) {
+ // And likewise, for the select recipes.
+ for (auto &I : Cache->SelectCSE) {
+ I.first.Cond = NewCond;
+ std::swap(I.first.TrueVal, I.first.FalseVal);
+ }
+ }
}
BasicBlock *UniqueSucc =
@@ -3668,6 +3737,45 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
ValueToValueMapTy VMap; // maps original values to cloned values
CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
+ assert((bool)Cache == !std::empty(CommonSucc->phis()));
+ if (Cache) {
+ SmallVector<std::pair<SelectMaterializationRecipe, Value *>, 2>
+ UniqueSelects;
+ UniqueSelects.resize(Cache->SelectCSE.size());
+ for (auto I : Cache->SelectCSE)
+ UniqueSelects[I.second].first = I.first;
+
+ for (auto I : zip(CommonSucc->phis(), Cache->Phis)) {
+ PHINode &PN = std::get<0>(I);
+ std::variant<CompatibleIncomingValue, SelectCache::UniqueSelectIndex>
+ &PhiRecipe = std::get<1>(I);
+
+ // Now, we have a recipe on how to deal with this PHI node. Either, there
+ // is an existing Value that we can use as an Incoming Value for the new
+ // predecessor we add, or we have a recipe for a select that will compute
+ // the Value the PHI node would have taken when coming from old BB's.
+ Value **MaterializedSelect;
+ if (auto *V = std::get_if<CompatibleIncomingValue>(&PhiRecipe)) {
+ MaterializedSelect = &V->V;
+ } else {
+ unsigned UniqSelIdx =
+ std::get<SelectCache::UniqueSelectIndex>(PhiRecipe);
+ const SelectMaterializationRecipe &SelRecipe =
+ UniqueSelects[UniqSelIdx].first;
+ MaterializedSelect = &UniqueSelects[UniqSelIdx].second;
+ if (!*MaterializedSelect) {
+ *MaterializedSelect =
+ Builder.CreateSelect(SelRecipe.Cond, SelRecipe.TrueVal,
+ SelRecipe.FalseVal, PN.getName() + ".sel");
+ if (auto *I = dyn_cast<Instruction>(*MaterializedSelect))
+ RemapInstruction(I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ }
+ }
+ PN.setIncomingValueForBlock(PredBlock, *MaterializedSelect);
+ }
+ }
+
// Now that the Cond was cloned into the predecessor basic block,
// or/and the two conditions together.
Value *BICond = VMap[BI->getCondition()];
@@ -3725,6 +3833,11 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
if (is_contained(successors(BB), BB))
return false;
+ SmallDenseMap<BasicBlock *, SelectCache> PerPredBBCaches;
+
+ unsigned NumSelectsNeeded = 0;
+ bool SawVectorSelect = false;
+
// With which predecessors will we want to deal with?
SmallVector<BasicBlock *, 8> Preds;
for (BasicBlock *PredBlock : predecessors(BB)) {
@@ -3733,7 +3846,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// Check that we have two conditional branches. If there is a PHI node in
// the common successor, verify that the same value flows in from both
// blocks.
- if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
+ if (!PBI || PBI->isUnconditional())
continue;
// Determine if the two branches share a common destination.
@@ -3751,13 +3864,45 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
Type *Ty = BI->getCondition()->getType();
InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
- !isa<CmpInst>(PBI->getCondition())))
+ !isa<CmpInst>(PBI->getCondition())))
Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
if (Cost > BranchFoldThreshold)
continue;
}
+ SelectCache *Cache = nullptr;
+ if (!std::empty(CommonSucc->phis()))
+ Cache = &PerPredBBCaches.insert({PredBlock, SelectCache()})
+ .first->getSecond();
+ auto getIncomingBlockInCommonSuccessor = [CommonSucc, PredBlock,
+ BB](BasicBlock *SuccOfPredBB) {
+ if (SuccOfPredBB == CommonSucc)
+ return PredBlock;
+ assert(SuccOfPredBB == BB);
+ return BB;
+ };
+ for (PHINode &PN : CommonSucc->phis()) {
+ SelectMaterializationRecipe R;
+ R.Cond = PBI->getCondition();
+ R.TrueVal = PN.getIncomingValueForBlock(
+ getIncomingBlockInCommonSuccessor(PBI->getSuccessor(0)));
+ R.FalseVal = PN.getIncomingValueForBlock(
+ getIncomingBlockInCommonSuccessor(PBI->getSuccessor(1)));
+ if (R.TrueVal == R.FalseVal) {
+ Cache->Phis.emplace_back(R.TrueVal); // CompatibleIncomingValue
+ continue;
+ }
+ // FIXME: can we get better results by using InstSimplify here?
+ auto [iterator, NewUniqueSelect] =
+ Cache->SelectCSE.insert({R, Cache->SelectCSE.size()});
+ Cache->Phis.emplace_back(iterator->second); // UniqueSelectIndex
+ if (!NewUniqueSelect)
+ continue;
+ ++NumSelectsNeeded;
+ SawVectorSelect |= PN.getType()->isVectorTy();
+ }
+
// Ok, we do want to deal with this predecessor. Record it.
Preds.emplace_back(PredBlock);
}
@@ -3773,8 +3918,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// as "bonus instructions", and only allow this transformation when the
// number of the bonus instructions we'll need to create when cloning into
// each predecessor does not exceed a certain threshold.
- unsigned NumBonusInsts = 0;
- bool SawVectorOp = false;
+ unsigned NumBonusInsts = NumSelectsNeeded;
+ bool SawVectorOp = SawVectorSelect;
const unsigned PredCount = Preds.size();
for (Instruction &I : *BB) {
// Don't check the branch condition comparison itself.
@@ -3819,7 +3964,11 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// Ok, we have the budget. Perform the transformation.
for (BasicBlock *PredBlock : Preds) {
auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
- return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
+
+ SelectCache *Cache = nullptr;
+ if (auto it = PerPredBBCaches.find(PredBlock); it != PerPredBBCaches.end())
+ Cache = &it->second;
+ return performBranchToCommonDestFolding(BI, PBI, Cache, DTU, MSSAU, TTI);
}
return false;
}