diff options
author | Luke Lau <luke@igalia.com> | 2025-06-12 11:09:09 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-06-12 11:09:09 +0100 |
commit | 7ef77eb9984d1fb537a409cf4be89560fbb681fe (patch) | |
tree | 40f124347a3b2e11fddf20da07bbd4c543c17012 /llvm/lib/Transforms | |
parent | 5987f1ee5cc59a05961156c04010ab0f3c857628 (diff) | |
download | llvm-7ef77eb9984d1fb537a409cf4be89560fbb681fe.zip llvm-7ef77eb9984d1fb537a409cf4be89560fbb681fe.tar.gz llvm-7ef77eb9984d1fb537a409cf4be89560fbb681fe.tar.bz2 |
[LV] Support scalable interleave groups for factors 3,5,6 and 7 (#141865)
Currently the loop vectorizer can only vectorize interleave groups for
power-of-2 factors at scalable VFs by recursively interleaving
[de]interleave2 intrinsics.
However after https://github.com/llvm/llvm-project/pull/124825 and
#139893, we now have [de]interleave intrinsics for all factors up to 8,
which is enough to support all types of segmented loads and stores on
RISC-V.
Now that the interleaved access pass has been taught to lower these in
#139373 and #141512, this patch teaches the loop vectorizer to emit
these intrinsics for factors up to 8, which enables scalable
vectorization for non-power-of-2 factors.
As far as I'm aware, no in-tree target will vectorize a scalable
interelave group above factor 8 because the maximum interleave factor is
capped at 4 on AArch64 and 8 on RISC-V, and the
`-max-interleave-group-factor` CLI option defaults to 8, so the
recursive [de]interleaving code has been removed for now.
Factors of 3 with scalable VFs are also turned off in AArch64 since
there's no lowering for [de]interleave3 just yet either.
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 63 |
2 files changed, 21 insertions, 56 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 93ab335..474f856 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3166,10 +3166,9 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened( if (hasIrregularType(ScalarTy, DL)) return false; - // For scalable vectors, the only interleave factor currently supported - // must be power of 2 since we require the (de)interleave2 intrinsics - // instead of shufflevectors. - if (VF.isScalable() && !isPowerOf2_32(InterleaveFactor)) + // For scalable vectors, the interleave factors must be <= 8 since we require + // the (de)interleaveN intrinsics instead of shufflevectors. + if (VF.isScalable() && InterleaveFactor > 8) return false; // If the group involves a non-integral pointer, we may not be able to @@ -8718,10 +8717,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( bool Result = (VF.isVector() && // Query is illegal for VF == 1 CM.getWideningDecision(IG->getInsertPos(), VF) == LoopVectorizationCostModel::CM_Interleave); - // For scalable vectors, the only interleave factor currently supported - // must be power of 2 since we require the (de)interleave2 intrinsics - // instead of shufflevectors. - assert((!Result || !VF.isScalable() || isPowerOf2_32(IG->getFactor())) && + // For scalable vectors, the interleave factors must be <= 8 since we + // require the (de)interleaveN intrinsics instead of shufflevectors. + assert((!Result || !VF.isScalable() || IG->getFactor() <= 8) && "Unsupported interleave factor for scalable vectors"); return Result; }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f5a2533..8863a3f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3296,21 +3296,13 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals, // Scalable vectors cannot use arbitrary shufflevectors (only splats), so // must use intrinsics to interleave. if (VecTy->isScalableTy()) { - assert(isPowerOf2_32(Factor) && "Unsupported interleave factor for " - "scalable vectors, must be power of 2"); - SmallVector<Value *> InterleavingValues(Vals); - // When interleaving, the number of values will be shrunk until we have the - // single final interleaved value. - auto *InterleaveTy = cast<VectorType>(InterleavingValues[0]->getType()); - for (unsigned Midpoint = Factor / 2; Midpoint > 0; Midpoint /= 2) { - InterleaveTy = VectorType::getDoubleElementsVectorType(InterleaveTy); - for (unsigned I = 0; I < Midpoint; ++I) - InterleavingValues[I] = Builder.CreateIntrinsic( - InterleaveTy, Intrinsic::vector_interleave2, - {InterleavingValues[I], InterleavingValues[Midpoint + I]}, - /*FMFSource=*/nullptr, Name); - } - return InterleavingValues[0]; + assert(Factor <= 8 && "Unsupported interleave factor for scalable vectors"); + VectorType *InterleaveTy = + VectorType::get(VecTy->getElementType(), + VecTy->getElementCount().multiplyCoefficientBy(Factor)); + return Builder.CreateIntrinsic(InterleaveTy, + getInterleaveIntrinsicID(Factor), Vals, + /*FMFSource=*/nullptr, Name); } // Fixed length. Start by concatenating all vectors into a wide vector. @@ -3396,7 +3388,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { &InterleaveFactor](Value *MaskForGaps) -> Value * { if (State.VF.isScalable()) { assert(!MaskForGaps && "Interleaved groups with gaps are not supported."); - assert(isPowerOf2_32(InterleaveFactor) && + assert(InterleaveFactor <= 8 && "Unsupported deinterleave factor for scalable vectors"); auto *ResBlockInMask = State.get(BlockInMask); SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask); @@ -3440,43 +3432,18 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { ArrayRef<VPValue *> VPDefs = definedValues(); const DataLayout &DL = State.CFG.PrevBB->getDataLayout(); if (VecTy->isScalableTy()) { - assert(isPowerOf2_32(InterleaveFactor) && - "Unsupported deinterleave factor for scalable vectors"); - // Scalable vectors cannot use arbitrary shufflevectors (only splats), // so must use intrinsics to deinterleave. - SmallVector<Value *> DeinterleavedValues(InterleaveFactor); - DeinterleavedValues[0] = NewLoad; - // For the case of InterleaveFactor > 2, we will have to do recursive - // deinterleaving, because the current available deinterleave intrinsic - // supports only Factor of 2, otherwise it will bailout after first - // iteration. - // When deinterleaving, the number of values will double until we - // have "InterleaveFactor". - for (unsigned NumVectors = 1; NumVectors < InterleaveFactor; - NumVectors *= 2) { - // Deinterleave the elements within the vector - SmallVector<Value *> TempDeinterleavedValues(NumVectors); - for (unsigned I = 0; I < NumVectors; ++I) { - auto *DiTy = DeinterleavedValues[I]->getType(); - TempDeinterleavedValues[I] = State.Builder.CreateIntrinsic( - Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I], - /*FMFSource=*/nullptr, "strided.vec"); - } - // Extract the deinterleaved values: - for (unsigned I = 0; I < 2; ++I) - for (unsigned J = 0; J < NumVectors; ++J) - DeinterleavedValues[NumVectors * I + J] = - State.Builder.CreateExtractValue(TempDeinterleavedValues[J], I); - } + assert(InterleaveFactor <= 8 && + "Unsupported deinterleave factor for scalable vectors"); + Value *Deinterleave = State.Builder.CreateIntrinsic( + getDeinterleaveIntrinsicID(InterleaveFactor), NewLoad->getType(), + NewLoad, + /*FMFSource=*/nullptr, "strided.vec"); -#ifndef NDEBUG - for (Value *Val : DeinterleavedValues) - assert(Val && "NULL Deinterleaved Value"); -#endif for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) { Instruction *Member = Group->getMember(I); - Value *StridedVec = DeinterleavedValues[I]; + Value *StridedVec = State.Builder.CreateExtractValue(Deinterleave, I); if (!Member) { // This value is not needed as it's not used cast<Instruction>(StridedVec)->eraseFromParent(); |