aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/InterleavedAccessPass.cpp
diff options
context:
space:
mode:
authorLuke Lau <luke@igalia.com>2025-05-22 04:32:47 +0100
committerGitHub <noreply@github.com>2025-05-22 04:32:47 +0100
commit09c3d1432b15203762b6ccd8f2faec17b5c61c8c (patch)
tree6c3d72687f5a58f671e078ef056d4e7db93f14e9 /llvm/lib/CodeGen/InterleavedAccessPass.cpp
parent0641ca1cd21d86f081157a33cd5e1aa2b1bd2d04 (diff)
downloadllvm-09c3d1432b15203762b6ccd8f2faec17b5c61c8c.zip
llvm-09c3d1432b15203762b6ccd8f2faec17b5c61c8c.tar.gz
llvm-09c3d1432b15203762b6ccd8f2faec17b5c61c8c.tar.bz2
[IA] Add support for [de]interleave{3,5,7} (#139373)
This adds support for lowering deinterleave and interleave intrinsics for factors 3 5 and 7 into target specific memory intrinsics. Notably this doesn't add support for handling higher factors constructed from interleaving interleave intrinsics, e.g. factor 6 from interleave3 + interleave2. I initially tried this but it became very complex very quickly. For example, because there's now multiple factors involved interleaveLeafValues is no longer symmetric between interleaving and deinterleaving. There's then also two ways of representing a factor 6 deinterleave: It can both be done as either 1 deinterleave3 and 3 deinterleave2s OR 1 deinterleave2 and 3 deinterleave3s. I'm not sure the complexity of supporting arbitrary factors is warranted given how we only need to support a small number of factors currently: SVE only needs factors 2,3,4 whilst RVV only needs 2,3,4,5,6,7,8. My preference would be to just add a interleave6 and deinterleave6 intrinsic to avoid all this ambiguity, but I'll defer this discussion to a later patch.
Diffstat (limited to 'llvm/lib/CodeGen/InterleavedAccessPass.cpp')
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp75
1 files changed, 56 insertions, 19 deletions
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 04d89d6..960c795 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -571,6 +571,25 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
return true;
}
+static unsigned getIntrinsicFactor(const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::vector_deinterleave2:
+ case Intrinsic::vector_interleave2:
+ return 2;
+ case Intrinsic::vector_deinterleave3:
+ case Intrinsic::vector_interleave3:
+ return 3;
+ case Intrinsic::vector_deinterleave5:
+ case Intrinsic::vector_interleave5:
+ return 5;
+ case Intrinsic::vector_deinterleave7:
+ case Intrinsic::vector_interleave7:
+ return 7;
+ default:
+ llvm_unreachable("Unexpected intrinsic");
+ }
+}
+
// For an (de)interleave tree like this:
//
// A C B D
@@ -586,7 +605,7 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
// to reorder them by interleaving these values.
static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
unsigned NumLeaves = SubLeaves.size();
- if (NumLeaves == 2)
+ if (NumLeaves == 2 || !isPowerOf2_64(NumLeaves))
return;
assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1);
@@ -608,7 +627,10 @@ static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
static bool
getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
SmallVectorImpl<Instruction *> &DeadInsts) {
- assert(II->getIntrinsicID() == Intrinsic::vector_interleave2);
+ assert(II->getIntrinsicID() == Intrinsic::vector_interleave2 ||
+ II->getIntrinsicID() == Intrinsic::vector_interleave3 ||
+ II->getIntrinsicID() == Intrinsic::vector_interleave5 ||
+ II->getIntrinsicID() == Intrinsic::vector_interleave7);
// Visit with BFS
SmallVector<IntrinsicInst *, 8> Queue;
@@ -620,7 +642,7 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
// All the intermediate intrinsics will be deleted.
DeadInsts.push_back(Current);
- for (unsigned I = 0; I < 2; ++I) {
+ for (unsigned I = 0; I < getIntrinsicFactor(Current); ++I) {
Value *Op = Current->getOperand(I);
if (auto *OpII = dyn_cast<IntrinsicInst>(Op))
if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) {
@@ -638,9 +660,10 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
}
const unsigned Factor = Operands.size();
- // Currently we only recognize power-of-two factors.
+ // Currently we only recognize factors of 3, 5, 7, and powers of 2.
// FIXME: should we assert here instead?
- if (Factor <= 1 || !isPowerOf2_32(Factor))
+ if (Factor <= 1 ||
+ (!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II)))
return false;
interleaveLeafValues(Operands);
@@ -651,9 +674,12 @@ static bool
getVectorDeinterleaveFactor(IntrinsicInst *II,
SmallVectorImpl<Value *> &Results,
SmallVectorImpl<Instruction *> &DeadInsts) {
- assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2);
+ assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2 ||
+ II->getIntrinsicID() == Intrinsic::vector_deinterleave3 ||
+ II->getIntrinsicID() == Intrinsic::vector_deinterleave5 ||
+ II->getIntrinsicID() == Intrinsic::vector_deinterleave7);
using namespace PatternMatch;
- if (!II->hasNUses(2))
+ if (!II->hasNUses(getIntrinsicFactor(II)))
return false;
// Visit with BFS
@@ -662,12 +688,12 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
while (!Queue.empty()) {
IntrinsicInst *Current = Queue.front();
Queue.erase(Queue.begin());
- assert(Current->hasNUses(2));
+ assert(Current->hasNUses(getIntrinsicFactor(Current)));
// All the intermediate intrinsics will be deleted from the bottom-up.
DeadInsts.insert(DeadInsts.begin(), Current);
- ExtractValueInst *LHS = nullptr, *RHS = nullptr;
+ SmallVector<ExtractValueInst *> EVs(getIntrinsicFactor(Current), nullptr);
for (User *Usr : Current->users()) {
if (!isa<ExtractValueInst>(Usr))
return 0;
@@ -679,17 +705,15 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
if (Indices.size() != 1)
return false;
- if (Indices[0] == 0 && !LHS)
- LHS = EV;
- else if (Indices[0] == 1 && !RHS)
- RHS = EV;
+ if (!EVs[Indices[0]])
+ EVs[Indices[0]] = EV;
else
return false;
}
// We have legal indices. At this point we're either going
// to continue the traversal or push the leaf values into Results.
- for (ExtractValueInst *EV : {LHS, RHS}) {
+ for (ExtractValueInst *EV : EVs) {
// Continue the traversal. We're playing safe here and matching only the
// expression consisting of a perfectly balanced binary tree in which all
// intermediate values are only used once.
@@ -713,9 +737,10 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
}
const unsigned Factor = Results.size();
- // Currently we only recognize power-of-two factors.
+ // Currently we only recognize factors of 3, 5, 7, and powers of 2.
// FIXME: should we assert here instead?
- if (Factor <= 1 || !isPowerOf2_32(Factor))
+ if (Factor <= 1 ||
+ (!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II)))
return 0;
interleaveLeafValues(Results);
@@ -878,11 +903,23 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
// At present, we only have intrinsics to represent (de)interleaving
- // with a factor of 2.
- if (II->getIntrinsicID() == Intrinsic::vector_deinterleave2)
+ // with a factor of 2,3,5 and 7.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::vector_deinterleave2:
+ case Intrinsic::vector_deinterleave3:
+ case Intrinsic::vector_deinterleave5:
+ case Intrinsic::vector_deinterleave7:
Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
- else if (II->getIntrinsicID() == Intrinsic::vector_interleave2)
+ break;
+ case Intrinsic::vector_interleave2:
+ case Intrinsic::vector_interleave3:
+ case Intrinsic::vector_interleave5:
+ case Intrinsic::vector_interleave7:
Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
+ break;
+ default:
+ break;
+ }
}
}