diff options
author | Luke Lau <luke@igalia.com> | 2025-05-22 04:32:47 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-22 04:32:47 +0100 |
commit | 09c3d1432b15203762b6ccd8f2faec17b5c61c8c (patch) | |
tree | 6c3d72687f5a58f671e078ef056d4e7db93f14e9 /llvm/lib/CodeGen/InterleavedAccessPass.cpp | |
parent | 0641ca1cd21d86f081157a33cd5e1aa2b1bd2d04 (diff) | |
download | llvm-09c3d1432b15203762b6ccd8f2faec17b5c61c8c.zip llvm-09c3d1432b15203762b6ccd8f2faec17b5c61c8c.tar.gz llvm-09c3d1432b15203762b6ccd8f2faec17b5c61c8c.tar.bz2 |
[IA] Add support for [de]interleave{3,5,7} (#139373)
This adds support for lowering deinterleave and interleave intrinsics
for factors 3 5 and 7 into target specific memory intrinsics.
Notably this doesn't add support for handling higher factors constructed
from interleaving interleave intrinsics, e.g. factor 6 from interleave3
+ interleave2.
I initially tried this but it became very complex very quickly. For
example, because there's now multiple factors involved
interleaveLeafValues is no longer symmetric between interleaving and
deinterleaving. There's then also two ways of representing a factor 6
deinterleave: It can both be done as either 1 deinterleave3 and 3
deinterleave2s OR 1 deinterleave2 and 3 deinterleave3s.
I'm not sure the complexity of supporting arbitrary factors is warranted
given how we only need to support a small number of factors currently:
SVE only needs factors 2,3,4 whilst RVV only needs 2,3,4,5,6,7,8.
My preference would be to just add a interleave6 and deinterleave6
intrinsic to avoid all this ambiguity, but I'll defer this discussion to
a later patch.
Diffstat (limited to 'llvm/lib/CodeGen/InterleavedAccessPass.cpp')
-rw-r--r-- | llvm/lib/CodeGen/InterleavedAccessPass.cpp | 75 |
1 files changed, 56 insertions, 19 deletions
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 04d89d6..960c795 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -571,6 +571,25 @@ bool InterleavedAccessImpl::lowerInterleavedStore( return true; } +static unsigned getIntrinsicFactor(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + case Intrinsic::vector_deinterleave2: + case Intrinsic::vector_interleave2: + return 2; + case Intrinsic::vector_deinterleave3: + case Intrinsic::vector_interleave3: + return 3; + case Intrinsic::vector_deinterleave5: + case Intrinsic::vector_interleave5: + return 5; + case Intrinsic::vector_deinterleave7: + case Intrinsic::vector_interleave7: + return 7; + default: + llvm_unreachable("Unexpected intrinsic"); + } +} + // For an (de)interleave tree like this: // // A C B D @@ -586,7 +605,7 @@ bool InterleavedAccessImpl::lowerInterleavedStore( // to reorder them by interleaving these values. static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) { unsigned NumLeaves = SubLeaves.size(); - if (NumLeaves == 2) + if (NumLeaves == 2 || !isPowerOf2_64(NumLeaves)) return; assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1); @@ -608,7 +627,10 @@ static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) { static bool getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands, SmallVectorImpl<Instruction *> &DeadInsts) { - assert(II->getIntrinsicID() == Intrinsic::vector_interleave2); + assert(II->getIntrinsicID() == Intrinsic::vector_interleave2 || + II->getIntrinsicID() == Intrinsic::vector_interleave3 || + II->getIntrinsicID() == Intrinsic::vector_interleave5 || + II->getIntrinsicID() == Intrinsic::vector_interleave7); // Visit with BFS SmallVector<IntrinsicInst *, 8> Queue; @@ -620,7 +642,7 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands, // All the intermediate intrinsics will be deleted. DeadInsts.push_back(Current); - for (unsigned I = 0; I < 2; ++I) { + for (unsigned I = 0; I < getIntrinsicFactor(Current); ++I) { Value *Op = Current->getOperand(I); if (auto *OpII = dyn_cast<IntrinsicInst>(Op)) if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) { @@ -638,9 +660,10 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands, } const unsigned Factor = Operands.size(); - // Currently we only recognize power-of-two factors. + // Currently we only recognize factors of 3, 5, 7, and powers of 2. // FIXME: should we assert here instead? - if (Factor <= 1 || !isPowerOf2_32(Factor)) + if (Factor <= 1 || + (!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II))) return false; interleaveLeafValues(Operands); @@ -651,9 +674,12 @@ static bool getVectorDeinterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Results, SmallVectorImpl<Instruction *> &DeadInsts) { - assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2); + assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2 || + II->getIntrinsicID() == Intrinsic::vector_deinterleave3 || + II->getIntrinsicID() == Intrinsic::vector_deinterleave5 || + II->getIntrinsicID() == Intrinsic::vector_deinterleave7); using namespace PatternMatch; - if (!II->hasNUses(2)) + if (!II->hasNUses(getIntrinsicFactor(II))) return false; // Visit with BFS @@ -662,12 +688,12 @@ getVectorDeinterleaveFactor(IntrinsicInst *II, while (!Queue.empty()) { IntrinsicInst *Current = Queue.front(); Queue.erase(Queue.begin()); - assert(Current->hasNUses(2)); + assert(Current->hasNUses(getIntrinsicFactor(Current))); // All the intermediate intrinsics will be deleted from the bottom-up. DeadInsts.insert(DeadInsts.begin(), Current); - ExtractValueInst *LHS = nullptr, *RHS = nullptr; + SmallVector<ExtractValueInst *> EVs(getIntrinsicFactor(Current), nullptr); for (User *Usr : Current->users()) { if (!isa<ExtractValueInst>(Usr)) return 0; @@ -679,17 +705,15 @@ getVectorDeinterleaveFactor(IntrinsicInst *II, if (Indices.size() != 1) return false; - if (Indices[0] == 0 && !LHS) - LHS = EV; - else if (Indices[0] == 1 && !RHS) - RHS = EV; + if (!EVs[Indices[0]]) + EVs[Indices[0]] = EV; else return false; } // We have legal indices. At this point we're either going // to continue the traversal or push the leaf values into Results. - for (ExtractValueInst *EV : {LHS, RHS}) { + for (ExtractValueInst *EV : EVs) { // Continue the traversal. We're playing safe here and matching only the // expression consisting of a perfectly balanced binary tree in which all // intermediate values are only used once. @@ -713,9 +737,10 @@ getVectorDeinterleaveFactor(IntrinsicInst *II, } const unsigned Factor = Results.size(); - // Currently we only recognize power-of-two factors. + // Currently we only recognize factors of 3, 5, 7, and powers of 2. // FIXME: should we assert here instead? - if (Factor <= 1 || !isPowerOf2_32(Factor)) + if (Factor <= 1 || + (!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II))) return 0; interleaveLeafValues(Results); @@ -878,11 +903,23 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) { if (auto *II = dyn_cast<IntrinsicInst>(&I)) { // At present, we only have intrinsics to represent (de)interleaving - // with a factor of 2. - if (II->getIntrinsicID() == Intrinsic::vector_deinterleave2) + // with a factor of 2,3,5 and 7. + switch (II->getIntrinsicID()) { + case Intrinsic::vector_deinterleave2: + case Intrinsic::vector_deinterleave3: + case Intrinsic::vector_deinterleave5: + case Intrinsic::vector_deinterleave7: Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts); - else if (II->getIntrinsicID() == Intrinsic::vector_interleave2) + break; + case Intrinsic::vector_interleave2: + case Intrinsic::vector_interleave3: + case Intrinsic::vector_interleave5: + case Intrinsic::vector_interleave7: Changed |= lowerInterleaveIntrinsic(II, DeadInsts); + break; + default: + break; + } } } |