aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp40
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp9
-rw-r--r--llvm/lib/Transforms/Utils/BuildLibCalls.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/LoopVersioning.cpp9
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp24
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp11
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp13
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp119
14 files changed, 159 insertions, 103 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 7a95df4..b575d76 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1378,8 +1378,7 @@ static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU,
IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
// We can't know the precise weights here, as they would depend on the value
// distribution of Call->getArgOperand(1). So we just mark it as "unknown".
- setExplicitlyUnknownBranchWeightsIfProfiled(*SI, *Call->getFunction(),
- DEBUG_TYPE);
+ setExplicitlyUnknownBranchWeightsIfProfiled(*SI, DEBUG_TYPE);
Type *IndexTy = DL.getIndexType(Call->getType());
SmallVector<DominatorTree::UpdateType, 8> Updates;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index d85e4f7..9bdd8cb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -479,7 +479,7 @@ private:
const Twine &NameStr = "",
InsertPosition InsertBefore = nullptr) {
auto *Sel = SelectInst::Create(C, S1, S2, NameStr, InsertBefore, nullptr);
- setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, F, DEBUG_TYPE);
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, DEBUG_TYPE, &F);
return Sel;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 67f837c..b158e0f 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2261,11 +2261,11 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
}
Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
- if (!isa<Constant>(I.getOperand(1)))
- return nullptr;
+ bool IsOtherParamConst = isa<Constant>(I.getOperand(1));
if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
- if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
+ if (Instruction *NewSel =
+ FoldOpIntoSelect(I, Sel, false, !IsOtherParamConst))
return NewSel;
} else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 019536ca..9070d25 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -72,6 +72,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
STATISTIC(NumShiftUntilZero,
"Number of uncountable loops recognized as 'shift until zero' idiom");
+namespace llvm {
bool DisableLIRP::All;
static cl::opt<bool, true>
DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt<bool> ForceMemsetPatternIntrinsic(
cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false),
cl::Hidden);
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
namespace {
class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// The loop trip count check.
auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
- Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector<uint32_t> BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+ if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
+ std::swap(BranchWeights[0], BranchWeights[1]);
+ // We're not changing the loop profile, so we can reuse the original loop's
+ // profile.
+ setBranchWeights(*BI, BranchWeights,
+ /*IsExpected=*/false);
+ }
+
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, ScalarEvolution *SE,
/// %start = <...>
/// %extraoffset = <...>
/// <...>
-/// br label %for.cond
+/// br label %loop
///
/// loop:
-/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
+/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
/// %nbits = add nsw i8 %iv, %extraoffset
/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
// The loop terminator.
Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
- Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector<uint32_t> BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+ if (InvertedCond)
+ std::swap(BranchWeights[0], BranchWeights[1]);
+ // We're not changing the loop profile, so we can reuse the original loop's
+ // profile.
+ setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
+ }
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index b9546c5..e902b71 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -393,6 +394,17 @@ private:
DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
++NumLoopExitsDeleted;
}
+ // We don't really need to add branch weights to DummySwitch, because all
+ // but one branches are just a temporary artifact - see the comment on top
+ // of this function. But, it's easy to estimate the weights, and it helps
+ // maintain a property of the overall compiler - that the branch weights
+ // don't "just get dropped" accidentally (i.e. profcheck)
+ if (DummySwitch->getParent()->getParent()->hasProfileData()) {
+ SmallVector<uint32_t> DummyBranchWeights(1 + DummySwitch->getNumCases());
+ // default. 100% probability, the rest are dead.
+ DummyBranchWeights[0] = 1;
+ setBranchWeights(*DummySwitch, DummyBranchWeights, /*IsExpected=*/false);
+ }
assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 0577ddb..0f3e664 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -330,8 +330,7 @@ static void buildPartialUnswitchConditionalBranch(
HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
: nullptr);
if (!HasBranchWeights)
- setExplicitlyUnknownBranchWeightsIfProfiled(
- *BR, *BR->getParent()->getParent(), DEBUG_TYPE);
+ setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE);
}
/// Copy a set of loop invariant values, and conditionally branch on them.
@@ -389,8 +388,7 @@ static void buildPartialInvariantUnswitchConditionalBranch(
IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
if (!ProfData)
- setExplicitlyUnknownBranchWeightsIfProfiled(*BR, *BR->getFunction(),
- DEBUG_TYPE);
+ setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE);
}
/// Rewrite the PHI nodes in an unswitched loop exit basic block.
@@ -3204,8 +3202,7 @@ injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
auto *InvariantBr =
Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
// We don't know anything about the relation between the limits.
- setExplicitlyUnknownBranchWeightsIfProfiled(
- *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE);
+ setExplicitlyUnknownBranchWeightsIfProfiled(*InvariantBr, DEBUG_TYPE);
Builder.SetInsertPoint(CheckBlock);
Builder.CreateCondBr(
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 573a781..02b73e8 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1283,6 +1283,12 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
case LibFunc_ilogbl:
case LibFunc_logf:
case LibFunc_logl:
+ case LibFunc_nextafter:
+ case LibFunc_nextafterf:
+ case LibFunc_nextafterl:
+ case LibFunc_nexttoward:
+ case LibFunc_nexttowardf:
+ case LibFunc_nexttowardl:
case LibFunc_pow:
case LibFunc_powf:
case LibFunc_powl:
diff --git a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp
index 6d4436b..dd8706c 100644
--- a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp
+++ b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp
@@ -54,8 +54,8 @@ PreservedAnalyses DeclareRuntimeLibcallsPass::run(Module &M,
const DataLayout &DL = M.getDataLayout();
const Triple &TT = M.getTargetTriple();
- for (RTLIB::LibcallImpl Impl : RTLCI.getLibcallImpls()) {
- if (Impl == RTLIB::Unsupported)
+ for (RTLIB::LibcallImpl Impl : RTLIB::libcall_impls()) {
+ if (!RTLCI.isAvailable(Impl))
continue;
auto [FuncTy, FuncAttrs] = RTLCI.getFunctionTy(Ctx, TT, DL, Impl);
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index ec2e6c1..9c8b6ef 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -109,8 +110,12 @@ void LoopVersioning::versionLoop(
// Insert the conditional branch based on the result of the memchecks.
Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
Builder.SetInsertPoint(OrigTerm);
- Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
- VersionedLoop->getLoopPreheader());
+ auto *BI =
+ Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
+ VersionedLoop->getLoopPreheader());
+ // We don't know what the probability of executing the versioned vs the
+ // unversioned variants is.
+ setExplicitlyUnknownBranchWeightsIfProfiled(*BI, DEBUG_TYPE);
OrigTerm->eraseFromParent();
// The loops merge in the original exit block. This is now dominated by the
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3a3e3ad..37c048f 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5214,8 +5214,7 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
// We don't have any info about this condition.
auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
: Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
- setExplicitlyUnknownBranchWeightsIfProfiled(*Br, *NewBB->getParent(),
- DEBUG_TYPE);
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Br, DEBUG_TYPE);
OldTI->eraseFromParent();
@@ -7732,19 +7731,24 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
// label. The other is those powers of 2 that don't appear in the case
// statement. We don't know the distribution of the values coming in, so
// the safest is to split 50-50 the original probability to `default`.
- uint64_t OrigDenominator = sum_of(map_range(
- Weights, [](const auto &V) { return static_cast<uint64_t>(V); }));
+ uint64_t OrigDenominator =
+ sum_of(map_range(Weights, StaticCastTo<uint64_t>));
SmallVector<uint64_t> NewWeights(2);
NewWeights[1] = Weights[0] / 2;
NewWeights[0] = OrigDenominator - NewWeights[1];
setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
-
- // For the original switch, we reduce the weight of the default by the
- // amount by which the previous branch contributes to getting to default,
- // and then make sure the remaining weights have the same relative ratio
- // wrt eachother.
+ // The probability of executing the default block stays constant. It was
+ // p_d = Weights[0] / OrigDenominator
+ // we rewrite as W/D
+ // We want to find the probability of the default branch of the switch
+ // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
+ // i.e. the original probability is the probability we go to the default
+ // branch from the BI branch, or we take the default branch on the SI.
+ // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
+ // This matches using W/2 for the default branch probability numerator and
+ // D-W/2 as the denominator.
+ Weights[0] = NewWeights[1];
uint64_t CasesDenominator = OrigDenominator - Weights[0];
- Weights[0] /= 2;
for (auto &W : drop_begin(Weights))
W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 428a8f4..dd26a05 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -304,18 +304,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
}
bool IsSingleScalar = vputils::isSingleScalar(Def);
-
VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
- // Check if there is a scalar value for the selected lane.
- if (!hasScalarValue(Def, LastLane)) {
- // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
- // VPExpandSCEVRecipes can also be a single scalar.
- assert((isa<VPWidenIntOrFpInductionRecipe, VPScalarIVStepsRecipe,
- VPExpandSCEVRecipe>(Def->getDefiningRecipe())) &&
- "unexpected recipe found to be invariant");
- IsSingleScalar = true;
- LastLane = 0;
- }
// We need to construct the vector value for a single-scalar value by
// broadcasting the scalar to all lanes.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index cfe1f1e..22ea083 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1725,7 +1725,9 @@ public:
#endif
};
-/// A recipe for widening select instructions.
+/// A recipe for widening select instructions. Supports both wide vector and
+/// single-scalar conditions, matching the behavior of LLVM IR's select
+/// instruction.
struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags,
public VPIRMetadata {
VPWidenSelectRecipe(SelectInst &I, ArrayRef<VPValue *> Operands)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1ee405a..f792d0a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -659,7 +659,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
}
case Instruction::Select: {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
- Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
+ Value *Cond =
+ State.get(getOperand(0),
+ OnlyFirstLaneUsed || vputils::isSingleScalar(getOperand(0)));
Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
return Builder.CreateSelect(Cond, Op1, Op2, Name);
@@ -1968,16 +1970,13 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
getOperand(1)->printAsOperand(O, SlotTracker);
O << ", ";
getOperand(2)->printAsOperand(O, SlotTracker);
- O << (isInvariantCond() ? " (condition is loop invariant)" : "");
+ O << (vputils::isSingleScalar(getCond()) ? " (condition is single-scalar)"
+ : "");
}
#endif
void VPWidenSelectRecipe::execute(VPTransformState &State) {
- // The condition can be loop invariant but still defined inside the
- // loop. This means that we can't just use the original 'cond' value.
- // We have to take the 'vectorized' value and pick the first lane.
- // Instcombine will make this a no-op.
- Value *Cond = State.get(getCond(), isInvariantCond());
+ Value *Cond = State.get(getCond(), vputils::isSingleScalar(getCond()));
Value *Op0 = State.get(getOperand(1));
Value *Op1 = State.get(getOperand(2));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2588c87..8ad772f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -167,8 +167,7 @@ static bool sinkScalarOperands(VPlan &Plan) {
if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Candidate))
return;
- if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() ||
- Candidate->mayReadOrWriteMemory())
+ if (Candidate->getParent() == SinkTo || cannotHoistOrSinkRecipe(*Candidate))
return;
if (auto *RepR = dyn_cast<VPReplicateRecipe>(Candidate))
@@ -1287,6 +1286,15 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
return;
}
+ // Look through broadcast of single-scalar when used as select conditions; in
+ // that case the scalar condition can be used directly.
+ if (match(Def,
+ m_Select(m_Broadcast(m_VPValue(C)), m_VPValue(), m_VPValue())) &&
+ vputils::isSingleScalar(C)) {
+ Def->setOperand(0, C);
+ return;
+ }
+
if (auto *Phi = dyn_cast<VPPhi>(Def)) {
if (Phi->getNumOperands() == 1)
Phi->replaceAllUsesWith(Phi->getOperand(0));
@@ -4175,6 +4183,59 @@ static bool isAlreadyNarrow(VPValue *VPV) {
return RepR && RepR->isSingleScalar();
}
+// Convert a wide recipe defining a VPValue \p V feeding an interleave group to
+// a narrow variant.
+static VPValue *
+narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl<VPValue *> &NarrowedOps) {
+ auto *R = V->getDefiningRecipe();
+ if (!R || NarrowedOps.contains(V))
+ return V;
+
+ if (isAlreadyNarrow(V))
+ return V;
+
+ if (auto *WideMember0 = dyn_cast<VPWidenRecipe>(R)) {
+ for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx)
+ WideMember0->setOperand(
+ Idx,
+ narrowInterleaveGroupOp(WideMember0->getOperand(Idx), NarrowedOps));
+ return V;
+ }
+
+ if (auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) {
+ // Narrow interleave group to wide load, as transformed VPlan will only
+ // process one original iteration.
+ auto *LI = cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
+ auto *L = new VPWidenLoadRecipe(
+ *LI, LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
+ /*Reverse=*/false, LI->getAlign(), {}, LoadGroup->getDebugLoc());
+ L->insertBefore(LoadGroup);
+ NarrowedOps.insert(L);
+ return L;
+ }
+
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(R)) {
+ assert(RepR->isSingleScalar() &&
+ isa<LoadInst>(RepR->getUnderlyingInstr()) &&
+ "must be a single scalar load");
+ NarrowedOps.insert(RepR);
+ return RepR;
+ }
+
+ auto *WideLoad = cast<VPWidenLoadRecipe>(R);
+ VPValue *PtrOp = WideLoad->getAddr();
+ if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(PtrOp))
+ PtrOp = VecPtr->getOperand(0);
+ // Narrow wide load to uniform scalar load, as transformed VPlan will only
+ // process one original iteration.
+ auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(), {PtrOp},
+ /*IsUniform*/ true,
+ /*Mask*/ nullptr, *WideLoad);
+ N->insertBefore(WideLoad);
+ NarrowedOps.insert(N);
+ return N;
+}
+
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
unsigned VectorRegWidth) {
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
@@ -4276,60 +4337,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
SmallPtrSet<VPValue *, 4> NarrowedOps;
- auto NarrowOp = [&NarrowedOps](VPValue *V) -> VPValue * {
- auto *R = V->getDefiningRecipe();
- if (!R || NarrowedOps.contains(V))
- return V;
- if (auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) {
- // Narrow interleave group to wide load, as transformed VPlan will only
- // process one original iteration.
- auto *LI =
- cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
- auto *L = new VPWidenLoadRecipe(
- *LI, LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
- /*Reverse=*/false, LI->getAlign(), {}, LoadGroup->getDebugLoc());
- L->insertBefore(LoadGroup);
- NarrowedOps.insert(L);
- return L;
- }
-
- if (auto *RepR = dyn_cast<VPReplicateRecipe>(R)) {
- assert(RepR->isSingleScalar() &&
- isa<LoadInst>(RepR->getUnderlyingInstr()) &&
- "must be a single scalar load");
- NarrowedOps.insert(RepR);
- return RepR;
- }
- auto *WideLoad = cast<VPWidenLoadRecipe>(R);
-
- VPValue *PtrOp = WideLoad->getAddr();
- if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(PtrOp))
- PtrOp = VecPtr->getOperand(0);
- // Narrow wide load to uniform scalar load, as transformed VPlan will only
- // process one original iteration.
- auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(), {PtrOp},
- /*IsUniform*/ true,
- /*Mask*/ nullptr, *WideLoad);
- N->insertBefore(WideLoad);
- NarrowedOps.insert(N);
- return N;
- };
-
// Narrow operation tree rooted at store groups.
for (auto *StoreGroup : StoreGroups) {
- VPValue *Res = nullptr;
- VPValue *Member0 = StoreGroup->getStoredValues()[0];
- if (isAlreadyNarrow(Member0)) {
- Res = Member0;
- } else if (auto *WideMember0 =
- dyn_cast<VPWidenRecipe>(Member0->getDefiningRecipe())) {
- for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx)
- WideMember0->setOperand(Idx, NarrowOp(WideMember0->getOperand(Idx)));
- Res = WideMember0;
- } else {
- Res = NarrowOp(Member0);
- }
-
+ VPValue *Res =
+ narrowInterleaveGroupOp(StoreGroup->getStoredValues()[0], NarrowedOps);
auto *SI =
cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos());
auto *S = new VPWidenStoreRecipe(