aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp15
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp35
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h26
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp36
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h12
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll4
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/small-size.ll2
7 files changed, 79 insertions, 51 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 373ea75..e5deac7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9127,7 +9127,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
"Unexpected type.");
auto *IVR = getParent()->getPlan()->getCanonicalIV();
- PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0));
+ PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0, /*IsScalar*/ true));
if (onlyScalarsGenerated(State.VF.isScalable())) {
// This is the normalized GEP that starts counting at zero.
@@ -9243,7 +9243,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Reduction being replicated.");
- Value *PrevInChain = State.get(getChainOp(), 0);
+ Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
RecurKind Kind = RdxDesc.getRecurrenceKind();
bool IsOrdered = State.ILV->useOrderedReductions(RdxDesc);
// Propagate the fast-math flags carried by the underlying instruction.
@@ -9252,8 +9252,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewVecOp = State.get(getVecOp(), Part);
if (VPValue *Cond = getCondOp()) {
- Value *NewCond = State.VF.isVector() ? State.get(Cond, Part)
- : State.get(Cond, {Part, 0});
+ Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
@@ -9278,7 +9277,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
NewVecOp);
PrevInChain = NewRed;
} else {
- PrevInChain = State.get(getChainOp(), Part);
+ PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
}
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
@@ -9289,7 +9288,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
else
NextInChain = State.Builder.CreateBinOp(
(Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
- State.set(this, NextInChain, Part);
+ State.set(this, NextInChain, Part, /*IsScalar*/ true);
}
}
@@ -9404,7 +9403,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// We don't want to update the value in the map as it might be used in
// another expression. So don't call resetVectorValue(StoredVal).
}
- auto *VecPtr = State.get(getAddr(), Part);
+ auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (isMaskRequired)
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
BlockInMaskParts[Part]);
@@ -9428,7 +9427,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
nullptr, "wide.masked.gather");
State.addMetadata(NewLI, LI);
} else {
- auto *VecPtr = State.get(getAddr(), Part);
+ auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (isMaskRequired)
NewLI = Builder.CreateMaskedLoad(
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4ffbed4..4aeab6f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -242,7 +242,16 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
return Extract;
}
-Value *VPTransformState::get(VPValue *Def, unsigned Part) {
+Value *VPTransformState::get(VPValue *Def, unsigned Part, bool NeedsScalar) {
+ if (NeedsScalar) {
+ assert((VF.isScalar() || Def->isLiveIn() ||
+ (hasScalarValue(Def, VPIteration(Part, 0)) &&
+ Data.PerPartScalars[Def][Part].size() == 1)) &&
+ "Trying to access a single scalar per part but has multiple scalars "
+ "per part.");
+ return get(Def, VPIteration(Part, 0));
+ }
+
// If Values have been set for this Def return the one relevant for \p Part.
if (hasVectorValue(Def, Part))
return Data.PerPartOutput[Def][Part];
@@ -789,21 +798,15 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
auto *TCMO = Builder.CreateSub(TripCountV,
ConstantInt::get(TripCountV->getType(), 1),
"trip.count.minus.1");
- auto VF = State.VF;
- Value *VTCMO =
- VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
- for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
- State.set(BackedgeTakenCount, VTCMO, Part);
+ BackedgeTakenCount->setUnderlyingValue(TCMO);
}
- for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
- State.set(&VectorTripCount, VectorTripCountV, Part);
+ VectorTripCount.setUnderlyingValue(VectorTripCountV);
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
// FIXME: Model VF * UF computation completely in VPlan.
- State.set(&VFxUF,
- createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF),
- 0);
+ VFxUF.setUnderlyingValue(
+ createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF));
// When vectorizing the epilogue loop, the canonical induction start value
// needs to be changed from zero to the value after the main vector loop.
@@ -884,12 +887,16 @@ void VPlan::execute(VPTransformState *State) {
isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
(isa<VPReductionPHIRecipe>(PhiR) &&
cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
+ bool NeedsScalar = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
+ (isa<VPReductionPHIRecipe>(PhiR) &&
+ cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Value *Phi = State->get(PhiR, Part);
- Value *Val = State->get(PhiR->getBackedgeValue(),
- SinglePartNeeded ? State->UF - 1 : Part);
+ Value *Phi = State->get(PhiR, Part, NeedsScalar);
+ Value *Val =
+ State->get(PhiR->getBackedgeValue(),
+ SinglePartNeeded ? State->UF - 1 : Part, NeedsScalar);
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index aaddaaf..47cebec 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -259,9 +259,10 @@ struct VPTransformState {
DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
} Data;
- /// Get the generated Value for the given VPValue \p Def and the given \p Part.
- /// \see set.
- Value *get(VPValue *Def, unsigned Part);
+ /// Get the generated vector Value for a given VPValue \p Def and a given \p
+ /// Part if \p IsScalar is false, otherwise return the generated scalar
+ /// for \p Part. \See set.
+ Value *get(VPValue *Def, unsigned Part, bool IsScalar = false);
/// Get the generated Value for a given VPValue and given Part and Lane.
Value *get(VPValue *Def, const VPIteration &Instance);
@@ -282,14 +283,22 @@ struct VPTransformState {
I->second[Instance.Part][CacheIdx];
}
- /// Set the generated Value for a given VPValue and a given Part.
- void set(VPValue *Def, Value *V, unsigned Part) {
+ /// Set the generated vector Value for a given VPValue and a given Part, if \p
+ /// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).
+ void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) {
+ if (IsScalar) {
+ set(Def, V, VPIteration(Part, 0));
+ return;
+ }
+ assert((VF.isScalar() || V->getType()->isVectorTy()) &&
+ "scalar values must be stored as (Part, 0)");
if (!Data.PerPartOutput.count(Def)) {
DataState::PerPartValuesTy Entry(UF);
Data.PerPartOutput[Def] = Entry;
}
Data.PerPartOutput[Def][Part] = V;
}
+
/// Reset an existing vector value for \p Def and a given \p Part.
void reset(VPValue *Def, Value *V, unsigned Part) {
auto Iter = Data.PerPartOutput.find(Def);
@@ -1376,6 +1385,13 @@ public:
/// Returns the result type of the cast.
Type *getResultType() const { return ResultTy; }
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ // At the moment, only uniform codegen is implemented.
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
};
/// A recipe for widening Call instructions.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2d2f6ac..27b72575 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -279,11 +279,12 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Builder.SetCurrentDebugLocation(getDebugLoc());
if (Instruction::isBinaryOp(getOpcode())) {
+ bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
if (Part != 0 && vputils::onlyFirstPartUsed(this))
- return State.get(this, 0);
+ return State.get(this, 0, OnlyFirstLaneUsed);
- Value *A = State.get(getOperand(0), Part);
- Value *B = State.get(getOperand(1), Part);
+ Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
+ Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
auto *Res =
Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
if (auto *I = dyn_cast<Instruction>(Res))
@@ -385,8 +386,8 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
if (Part != 0)
return nullptr;
// First create the compare.
- Value *IV = State.get(getOperand(0), Part);
- Value *TC = State.get(getOperand(1), Part);
+ Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
+ Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
Value *Cond = Builder.CreateICmpEQ(IV, TC);
// Now create the branch.
@@ -407,7 +408,7 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
}
case VPInstruction::ComputeReductionResult: {
if (Part != 0)
- return State.get(this, 0);
+ return State.get(this, 0, /*IsScalar*/ true);
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
@@ -424,7 +425,7 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Type *PhiTy = OrigPhi->getType();
VectorParts RdxParts(State.UF);
for (unsigned Part = 0; Part < State.UF; ++Part)
- RdxParts[Part] = State.get(LoopExitingDef, Part);
+ RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
// If the vector reduction can be performed in a smaller type, we truncate
// then extend the loop exit value to enable InstCombine to evaluate the
@@ -512,9 +513,15 @@ void VPInstruction::execute(VPTransformState &State) {
if (!hasResult())
continue;
assert(GeneratedValue && "generateInstruction must produce a value");
- State.set(this, GeneratedValue, Part);
+
+ bool IsVector = GeneratedValue->getType()->isVectorTy();
+ State.set(this, GeneratedValue, Part, !IsVector);
+ assert((IsVector || getOpcode() == VPInstruction::ComputeReductionResult ||
+ State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) &&
+ "scalar value but not only first lane used");
}
}
+
bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
if (Instruction::isBinaryOp(getOpcode()))
@@ -530,8 +537,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::BranchOnCount:
- // TODO: Cover additional operands.
- return getOperand(0) == Op;
+ return true;
};
llvm_unreachable("switch should return");
}
@@ -1344,7 +1350,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) {
PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
}
- State.set(this, PartPtr, Part);
+ State.set(this, PartPtr, Part, /*IsScalar*/ true);
}
}
@@ -1640,7 +1646,7 @@ void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
EntryPart->addIncoming(Start, VectorPH);
EntryPart->setDebugLoc(getDebugLoc());
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
- State.set(this, EntryPart, Part);
+ State.set(this, EntryPart, Part, /*IsScalar*/ true);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1711,7 +1717,7 @@ void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
- Value *CanonicalIV = State.get(getOperand(0), 0);
+ Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
Type *STy = CanonicalIV->getType();
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
ElementCount VF = State.VF;
@@ -1801,7 +1807,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
- State.set(this, EntryPart, Part);
+ State.set(this, EntryPart, Part, IsInLoop);
}
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
@@ -1833,7 +1839,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
}
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Value *EntryPart = State.get(this, Part);
+ Value *EntryPart = State.get(this, Part, IsInLoop);
// Make sure to add the reduction start value only to the
// first unroll part.
Value *StartVal = (Part == 0) ? StartV : Iden;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index b114716..1d2c17e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -73,12 +73,6 @@ protected:
// for multiple underlying IRs (Polly?) by providing a new VPlan front-end,
// back-end and analysis information for the new IR.
- // Set \p Val as the underlying Value of this VPValue.
- void setUnderlyingValue(Value *Val) {
- assert(!UnderlyingVal && "Underlying Value is already set.");
- UnderlyingVal = Val;
- }
-
public:
/// Return the underlying Value attached to this VPValue.
Value *getUnderlyingValue() { return UnderlyingVal; }
@@ -192,6 +186,12 @@ public:
/// is a live-in value.
/// TODO: Also handle recipes defined in pre-header blocks.
bool isDefinedOutsideVectorRegions() const { return !hasDefiningRecipe(); }
+
+ // Set \p Val as the underlying Value of this VPValue.
+ void setUnderlyingValue(Value *Val) {
+ assert(!UnderlyingVal && "Underlying Value is already set.");
+ UnderlyingVal = Val;
+ }
};
typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
index 94f24fe..13fc0ea 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
@@ -117,10 +117,10 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_NO_LANEMASK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
; DATA_NO_LANEMASK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1
-; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
-; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
+; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
+; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT4]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; DATA_NO_LANEMASK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
index be83329..51d2648 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -142,7 +142,7 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_116]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT17]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY19:%.*]]
-; CHECK: vector.body19:
+; CHECK: vector.body17:
; CHECK-NEXT: [[INDEX20:%.*]] = phi i64 [ 0, [[VECTOR_PH9]] ], [ [[INDEX_NEXT31:%.*]], [[PRED_STORE_CONTINUE30:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX20]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX20]], i64 0