aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorLuke Lau <luke@igalia.com>2025-06-12 11:09:09 +0100
committerGitHub <noreply@github.com>2025-06-12 11:09:09 +0100
commit7ef77eb9984d1fb537a409cf4be89560fbb681fe (patch)
tree40f124347a3b2e11fddf20da07bbd4c543c17012 /llvm/lib/Transforms
parent5987f1ee5cc59a05961156c04010ab0f3c857628 (diff)
downloadllvm-7ef77eb9984d1fb537a409cf4be89560fbb681fe.zip
llvm-7ef77eb9984d1fb537a409cf4be89560fbb681fe.tar.gz
llvm-7ef77eb9984d1fb537a409cf4be89560fbb681fe.tar.bz2
[LV] Support scalable interleave groups for factors 3,5,6 and 7 (#141865)
Currently the loop vectorizer can only vectorize interleave groups for power-of-2 factors at scalable VFs by recursively interleaving [de]interleave2 intrinsics. However after https://github.com/llvm/llvm-project/pull/124825 and #139893, we now have [de]interleave intrinsics for all factors up to 8, which is enough to support all types of segmented loads and stores on RISC-V. Now that the interleaved access pass has been taught to lower these in #139373 and #141512, this patch teaches the loop vectorizer to emit these intrinsics for factors up to 8, which enables scalable vectorization for non-power-of-2 factors. As far as I'm aware, no in-tree target will vectorize a scalable interelave group above factor 8 because the maximum interleave factor is capped at 4 on AArch64 and 8 on RISC-V, and the `-max-interleave-group-factor` CLI option defaults to 8, so the recursive [de]interleaving code has been removed for now. Factors of 3 with scalable VFs are also turned off in AArch64 since there's no lowering for [de]interleave3 just yet either.
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp14
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp63
2 files changed, 21 insertions, 56 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 93ab335..474f856 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3166,10 +3166,9 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
if (hasIrregularType(ScalarTy, DL))
return false;
- // For scalable vectors, the only interleave factor currently supported
- // must be power of 2 since we require the (de)interleave2 intrinsics
- // instead of shufflevectors.
- if (VF.isScalable() && !isPowerOf2_32(InterleaveFactor))
+ // For scalable vectors, the interleave factors must be <= 8 since we require
+ // the (de)interleaveN intrinsics instead of shufflevectors.
+ if (VF.isScalable() && InterleaveFactor > 8)
return false;
// If the group involves a non-integral pointer, we may not be able to
@@ -8718,10 +8717,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
bool Result = (VF.isVector() && // Query is illegal for VF == 1
CM.getWideningDecision(IG->getInsertPos(), VF) ==
LoopVectorizationCostModel::CM_Interleave);
- // For scalable vectors, the only interleave factor currently supported
- // must be power of 2 since we require the (de)interleave2 intrinsics
- // instead of shufflevectors.
- assert((!Result || !VF.isScalable() || isPowerOf2_32(IG->getFactor())) &&
+ // For scalable vectors, the interleave factors must be <= 8 since we
+ // require the (de)interleaveN intrinsics instead of shufflevectors.
+ assert((!Result || !VF.isScalable() || IG->getFactor() <= 8) &&
"Unsupported interleave factor for scalable vectors");
return Result;
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f5a2533..8863a3f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3296,21 +3296,13 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
// Scalable vectors cannot use arbitrary shufflevectors (only splats), so
// must use intrinsics to interleave.
if (VecTy->isScalableTy()) {
- assert(isPowerOf2_32(Factor) && "Unsupported interleave factor for "
- "scalable vectors, must be power of 2");
- SmallVector<Value *> InterleavingValues(Vals);
- // When interleaving, the number of values will be shrunk until we have the
- // single final interleaved value.
- auto *InterleaveTy = cast<VectorType>(InterleavingValues[0]->getType());
- for (unsigned Midpoint = Factor / 2; Midpoint > 0; Midpoint /= 2) {
- InterleaveTy = VectorType::getDoubleElementsVectorType(InterleaveTy);
- for (unsigned I = 0; I < Midpoint; ++I)
- InterleavingValues[I] = Builder.CreateIntrinsic(
- InterleaveTy, Intrinsic::vector_interleave2,
- {InterleavingValues[I], InterleavingValues[Midpoint + I]},
- /*FMFSource=*/nullptr, Name);
- }
- return InterleavingValues[0];
+ assert(Factor <= 8 && "Unsupported interleave factor for scalable vectors");
+ VectorType *InterleaveTy =
+ VectorType::get(VecTy->getElementType(),
+ VecTy->getElementCount().multiplyCoefficientBy(Factor));
+ return Builder.CreateIntrinsic(InterleaveTy,
+ getInterleaveIntrinsicID(Factor), Vals,
+ /*FMFSource=*/nullptr, Name);
}
// Fixed length. Start by concatenating all vectors into a wide vector.
@@ -3396,7 +3388,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
&InterleaveFactor](Value *MaskForGaps) -> Value * {
if (State.VF.isScalable()) {
assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
- assert(isPowerOf2_32(InterleaveFactor) &&
+ assert(InterleaveFactor <= 8 &&
"Unsupported deinterleave factor for scalable vectors");
auto *ResBlockInMask = State.get(BlockInMask);
SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
@@ -3440,43 +3432,18 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
ArrayRef<VPValue *> VPDefs = definedValues();
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
if (VecTy->isScalableTy()) {
- assert(isPowerOf2_32(InterleaveFactor) &&
- "Unsupported deinterleave factor for scalable vectors");
-
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
// so must use intrinsics to deinterleave.
- SmallVector<Value *> DeinterleavedValues(InterleaveFactor);
- DeinterleavedValues[0] = NewLoad;
- // For the case of InterleaveFactor > 2, we will have to do recursive
- // deinterleaving, because the current available deinterleave intrinsic
- // supports only Factor of 2, otherwise it will bailout after first
- // iteration.
- // When deinterleaving, the number of values will double until we
- // have "InterleaveFactor".
- for (unsigned NumVectors = 1; NumVectors < InterleaveFactor;
- NumVectors *= 2) {
- // Deinterleave the elements within the vector
- SmallVector<Value *> TempDeinterleavedValues(NumVectors);
- for (unsigned I = 0; I < NumVectors; ++I) {
- auto *DiTy = DeinterleavedValues[I]->getType();
- TempDeinterleavedValues[I] = State.Builder.CreateIntrinsic(
- Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
- /*FMFSource=*/nullptr, "strided.vec");
- }
- // Extract the deinterleaved values:
- for (unsigned I = 0; I < 2; ++I)
- for (unsigned J = 0; J < NumVectors; ++J)
- DeinterleavedValues[NumVectors * I + J] =
- State.Builder.CreateExtractValue(TempDeinterleavedValues[J], I);
- }
+ assert(InterleaveFactor <= 8 &&
+ "Unsupported deinterleave factor for scalable vectors");
+ Value *Deinterleave = State.Builder.CreateIntrinsic(
+ getDeinterleaveIntrinsicID(InterleaveFactor), NewLoad->getType(),
+ NewLoad,
+ /*FMFSource=*/nullptr, "strided.vec");
-#ifndef NDEBUG
- for (Value *Val : DeinterleavedValues)
- assert(Val && "NULL Deinterleaved Value");
-#endif
for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
Instruction *Member = Group->getMember(I);
- Value *StridedVec = DeinterleavedValues[I];
+ Value *StridedVec = State.Builder.CreateExtractValue(Deinterleave, I);
if (!Member) {
// This value is not needed as it's not used
cast<Instruction>(StridedVec)->eraseFromParent();