aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/InterleavedAccessPass.cpp
diff options
context:
space:
mode:
authorMin-Yih Hsu <min.hsu@sifive.com>2025-01-23 15:27:51 -0800
committerGitHub <noreply@github.com>2025-01-23 15:27:51 -0800
commitbc74a1edbe5e6a3603e65efe06116fa72747acab (patch)
tree430a19dc2b4721361f6a93f167be1d062985fdd6 /llvm/lib/CodeGen/InterleavedAccessPass.cpp
parent10772807ab72ce2b68d76816f8753219b2acbac3 (diff)
downloadllvm-bc74a1edbe5e6a3603e65efe06116fa72747acab.zip
llvm-bc74a1edbe5e6a3603e65efe06116fa72747acab.tar.gz
llvm-bc74a1edbe5e6a3603e65efe06116fa72747acab.tar.bz2
[IA] Generalize the support for power-of-two (de)interleave intrinsics (#123863)
Previously, AArch64 used pattern matching to support llvm.vector.(de)interleave of 2 and 4; RISC-V only supported (de)interleave of 2. This patch consolidates the logics in these two targets by factoring out the common factor calculations into the InterleaveAccess Pass.
Diffstat (limited to 'llvm/lib/CodeGen/InterleavedAccessPass.cpp')
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp177
1 files changed, 169 insertions, 8 deletions
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index c6d5533..3f6a69e 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -60,6 +60,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -478,6 +479,157 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
return true;
}
+// For an (de)interleave tree like this:
+//
+// A C B D
+// |___| |___|
+// |_____|
+// |
+// A B C D
+//
+// We will get ABCD at the end while the leaf operands/results
+// are ACBD, which are also what we initially collected in
+// getVectorInterleaveFactor / getVectorDeinterleaveFactor. But TLI
+// hooks (e.g. lowerDeinterleaveIntrinsicToLoad) expect ABCD, so we need
+// to reorder them by interleaving these values.
+static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
+ unsigned NumLeaves = SubLeaves.size();
+ if (NumLeaves == 2)
+ return;
+
+ assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1);
+
+ const unsigned HalfLeaves = NumLeaves / 2;
+ // Visit the sub-trees.
+ interleaveLeafValues(SubLeaves.take_front(HalfLeaves));
+ interleaveLeafValues(SubLeaves.drop_front(HalfLeaves));
+
+ SmallVector<Value *, 8> Buffer;
+ // a0 a1 a2 a3 b0 b1 b2 b3
+ // -> a0 b0 a1 b1 a2 b2 a3 b3
+ for (unsigned i = 0U; i < NumLeaves; ++i)
+ Buffer.push_back(SubLeaves[i / 2 + (i % 2 ? HalfLeaves : 0)]);
+
+ llvm::copy(Buffer, SubLeaves.begin());
+}
+
+static bool
+getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
+ SmallVectorImpl<Instruction *> &DeadInsts) {
+ assert(II->getIntrinsicID() == Intrinsic::vector_interleave2);
+
+ // Visit with BFS
+ SmallVector<IntrinsicInst *, 8> Queue;
+ Queue.push_back(II);
+ while (!Queue.empty()) {
+ IntrinsicInst *Current = Queue.front();
+ Queue.erase(Queue.begin());
+
+ // All the intermediate intrinsics will be deleted.
+ DeadInsts.push_back(Current);
+
+ for (unsigned I = 0; I < 2; ++I) {
+ Value *Op = Current->getOperand(I);
+ if (auto *OpII = dyn_cast<IntrinsicInst>(Op))
+ if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) {
+ Queue.push_back(OpII);
+ continue;
+ }
+
+ // If this is not a perfectly balanced tree, the leaf
+ // result types would be different.
+ if (!Operands.empty() && Op->getType() != Operands.back()->getType())
+ return false;
+
+ Operands.push_back(Op);
+ }
+ }
+
+ const unsigned Factor = Operands.size();
+ // Currently we only recognize power-of-two factors.
+ // FIXME: should we assert here instead?
+ if (Factor <= 1 || !isPowerOf2_32(Factor))
+ return false;
+
+ interleaveLeafValues(Operands);
+ return true;
+}
+
+static bool
+getVectorDeinterleaveFactor(IntrinsicInst *II,
+ SmallVectorImpl<Value *> &Results,
+ SmallVectorImpl<Instruction *> &DeadInsts) {
+ assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2);
+ using namespace PatternMatch;
+ if (!II->hasNUses(2))
+ return false;
+
+ // Visit with BFS
+ SmallVector<IntrinsicInst *, 8> Queue;
+ Queue.push_back(II);
+ while (!Queue.empty()) {
+ IntrinsicInst *Current = Queue.front();
+ Queue.erase(Queue.begin());
+ assert(Current->hasNUses(2));
+
+ // All the intermediate intrinsics will be deleted from the bottom-up.
+ DeadInsts.insert(DeadInsts.begin(), Current);
+
+ ExtractValueInst *LHS = nullptr, *RHS = nullptr;
+ for (User *Usr : Current->users()) {
+ if (!isa<ExtractValueInst>(Usr))
+ return 0;
+
+ auto *EV = cast<ExtractValueInst>(Usr);
+ // Intermediate ExtractValue instructions will also be deleted.
+ DeadInsts.insert(DeadInsts.begin(), EV);
+ ArrayRef<unsigned> Indices = EV->getIndices();
+ if (Indices.size() != 1)
+ return false;
+
+ if (Indices[0] == 0 && !LHS)
+ LHS = EV;
+ else if (Indices[0] == 1 && !RHS)
+ RHS = EV;
+ else
+ return false;
+ }
+
+ // We have legal indices. At this point we're either going
+ // to continue the traversal or push the leaf values into Results.
+ for (ExtractValueInst *EV : {LHS, RHS}) {
+ // Continue the traversal. We're playing safe here and matching only the
+ // expression consisting of a perfectly balanced binary tree in which all
+ // intermediate values are only used once.
+ if (EV->hasOneUse() &&
+ match(EV->user_back(),
+ m_Intrinsic<Intrinsic::vector_deinterleave2>()) &&
+ EV->user_back()->hasNUses(2)) {
+ auto *EVUsr = cast<IntrinsicInst>(EV->user_back());
+ Queue.push_back(EVUsr);
+ continue;
+ }
+
+ // If this is not a perfectly balanced tree, the leaf
+ // result types would be different.
+ if (!Results.empty() && EV->getType() != Results.back()->getType())
+ return false;
+
+ // Save the leaf value.
+ Results.push_back(EV);
+ }
+ }
+
+ const unsigned Factor = Results.size();
+ // Currently we only recognize power-of-two factors.
+ // FIXME: should we assert here instead?
+ if (Factor <= 1 || !isPowerOf2_32(Factor))
+ return 0;
+
+ interleaveLeafValues(Results);
+ return true;
+}
+
bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
IntrinsicInst *DI, SmallSetVector<Instruction *, 32> &DeadInsts) {
LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));
@@ -485,16 +637,21 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
if (!LI || !LI->hasOneUse() || !LI->isSimple())
return false;
- LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
+ SmallVector<Value *, 8> DeinterleaveValues;
+ SmallVector<Instruction *, 8> DeinterleaveDeadInsts;
+ if (!getVectorDeinterleaveFactor(DI, DeinterleaveValues,
+ DeinterleaveDeadInsts))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI
+ << " with factor = " << DeinterleaveValues.size() << "\n");
// Try and match this with target specific intrinsics.
- SmallVector<Instruction *, 4> DeinterleaveDeadInsts;
- if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI, DeinterleaveDeadInsts))
+ if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
return false;
DeadInsts.insert(DeinterleaveDeadInsts.begin(), DeinterleaveDeadInsts.end());
// We now have a target-specific load, so delete the old one.
- DeadInsts.insert(DI);
DeadInsts.insert(LI);
return true;
}
@@ -509,16 +666,20 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
if (!SI || !SI->isSimple())
return false;
- LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
+ SmallVector<Value *, 8> InterleaveValues;
+ SmallVector<Instruction *, 8> InterleaveDeadInsts;
+ if (!getVectorInterleaveFactor(II, InterleaveValues, InterleaveDeadInsts))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II
+ << " with factor = " << InterleaveValues.size() << "\n");
- SmallVector<Instruction *, 4> InterleaveDeadInsts;
// Try and match this with target specific intrinsics.
- if (!TLI->lowerInterleaveIntrinsicToStore(II, SI, InterleaveDeadInsts))
+ if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
return false;
// We now have a target-specific store, so delete the old one.
DeadInsts.insert(SI);
- DeadInsts.insert(II);
DeadInsts.insert(InterleaveDeadInsts.begin(), InterleaveDeadInsts.end());
return true;
}