diff options
author | Luke Lau <luke@igalia.com> | 2025-05-28 11:44:41 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-28 11:44:41 +0100 |
commit | 6d88343662c25e2b06974da9987d52f23a120b51 (patch) | |
tree | c81278188f57286e7ebb732b822786b66a5566d6 /llvm/lib/CodeGen | |
parent | 5ab944a8c6a213beb96f3747a441b02e497732e4 (diff) | |
download | llvm-6d88343662c25e2b06974da9987d52f23a120b51.zip llvm-6d88343662c25e2b06974da9987d52f23a120b51.tar.gz llvm-6d88343662c25e2b06974da9987d52f23a120b51.tar.bz2 |
[IA] Add support for [de]interleave{4,6,8} (#141512)
This teaches the interleaved access pass to the lower the intrinsics for
factors 4,6 and 8 added in #139893 to target intrinsics.
Because factors 4 and 8 could either have been recursively
[de]interleaved or have just been a single intrinsic, we need to check
that it's the former it before reshuffling around the values via
interleaveLeafValues.
After this patch, we can teach the loop vectorizer to emit a single
interleave intrinsic for factors 2 through to 8, and then we can remove
the recursive interleaving matching in interleaved access pass.
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/InterleavedAccessPass.cpp | 88 |
1 files changed, 57 insertions, 31 deletions
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 960c795..49f1504 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -571,6 +571,36 @@ bool InterleavedAccessImpl::lowerInterleavedStore( return true; } +static bool isInterleaveIntrinsic(Intrinsic::ID IID) { + switch (IID) { + case Intrinsic::vector_interleave2: + case Intrinsic::vector_interleave3: + case Intrinsic::vector_interleave4: + case Intrinsic::vector_interleave5: + case Intrinsic::vector_interleave6: + case Intrinsic::vector_interleave7: + case Intrinsic::vector_interleave8: + return true; + default: + return false; + } +} + +static bool isDeinterleaveIntrinsic(Intrinsic::ID IID) { + switch (IID) { + case Intrinsic::vector_deinterleave2: + case Intrinsic::vector_deinterleave3: + case Intrinsic::vector_deinterleave4: + case Intrinsic::vector_deinterleave5: + case Intrinsic::vector_deinterleave6: + case Intrinsic::vector_deinterleave7: + case Intrinsic::vector_deinterleave8: + return true; + default: + return false; + } +} + static unsigned getIntrinsicFactor(const IntrinsicInst *II) { switch (II->getIntrinsicID()) { case Intrinsic::vector_deinterleave2: @@ -579,12 +609,21 @@ static unsigned getIntrinsicFactor(const IntrinsicInst *II) { case Intrinsic::vector_deinterleave3: case Intrinsic::vector_interleave3: return 3; + case Intrinsic::vector_deinterleave4: + case Intrinsic::vector_interleave4: + return 4; case Intrinsic::vector_deinterleave5: case Intrinsic::vector_interleave5: return 5; + case Intrinsic::vector_deinterleave6: + case Intrinsic::vector_interleave6: + return 6; case Intrinsic::vector_deinterleave7: case Intrinsic::vector_interleave7: return 7; + case Intrinsic::vector_deinterleave8: + case Intrinsic::vector_interleave8: + return 8; default: llvm_unreachable("Unexpected intrinsic"); } @@ -605,10 +644,9 @@ static unsigned getIntrinsicFactor(const IntrinsicInst *II) { // to reorder them by interleaving these values. static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) { unsigned NumLeaves = SubLeaves.size(); - if (NumLeaves == 2 || !isPowerOf2_64(NumLeaves)) - return; - assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1); + if (NumLeaves == 2) + return; const unsigned HalfLeaves = NumLeaves / 2; // Visit the sub-trees. @@ -627,10 +665,7 @@ static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) { static bool getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands, SmallVectorImpl<Instruction *> &DeadInsts) { - assert(II->getIntrinsicID() == Intrinsic::vector_interleave2 || - II->getIntrinsicID() == Intrinsic::vector_interleave3 || - II->getIntrinsicID() == Intrinsic::vector_interleave5 || - II->getIntrinsicID() == Intrinsic::vector_interleave7); + assert(isInterleaveIntrinsic(II->getIntrinsicID())); // Visit with BFS SmallVector<IntrinsicInst *, 8> Queue; @@ -660,13 +695,17 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands, } const unsigned Factor = Operands.size(); - // Currently we only recognize factors of 3, 5, 7, and powers of 2. + // Currently we only recognize factors 2...8 and other powers of 2. // FIXME: should we assert here instead? if (Factor <= 1 || (!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II))) return false; - interleaveLeafValues(Operands); + // Recursively interleaved factors need to have their values reordered + // TODO: Remove once the loop vectorizer no longer recursively interleaves + // factors 4 + 8 + if (isPowerOf2_32(Factor) && getIntrinsicFactor(II) == 2) + interleaveLeafValues(Operands); return true; } @@ -674,10 +713,7 @@ static bool getVectorDeinterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Results, SmallVectorImpl<Instruction *> &DeadInsts) { - assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2 || - II->getIntrinsicID() == Intrinsic::vector_deinterleave3 || - II->getIntrinsicID() == Intrinsic::vector_deinterleave5 || - II->getIntrinsicID() == Intrinsic::vector_deinterleave7); + assert(isDeinterleaveIntrinsic(II->getIntrinsicID())); using namespace PatternMatch; if (!II->hasNUses(getIntrinsicFactor(II))) return false; @@ -737,13 +773,17 @@ getVectorDeinterleaveFactor(IntrinsicInst *II, } const unsigned Factor = Results.size(); - // Currently we only recognize factors of 3, 5, 7, and powers of 2. + // Currently we only recognize factors of 2...8 and other powers of 2. // FIXME: should we assert here instead? if (Factor <= 1 || (!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II))) return 0; - interleaveLeafValues(Results); + // Recursively interleaved factors need to have their values reordered + // TODO: Remove once the loop vectorizer no longer recursively interleaves + // factors 4 + 8 + if (isPowerOf2_32(Factor) && getIntrinsicFactor(II) == 2) + interleaveLeafValues(Results); return true; } @@ -902,24 +942,10 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) { Changed |= lowerInterleavedStore(&I, DeadInsts); if (auto *II = dyn_cast<IntrinsicInst>(&I)) { - // At present, we only have intrinsics to represent (de)interleaving - // with a factor of 2,3,5 and 7. - switch (II->getIntrinsicID()) { - case Intrinsic::vector_deinterleave2: - case Intrinsic::vector_deinterleave3: - case Intrinsic::vector_deinterleave5: - case Intrinsic::vector_deinterleave7: + if (isDeinterleaveIntrinsic(II->getIntrinsicID())) Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts); - break; - case Intrinsic::vector_interleave2: - case Intrinsic::vector_interleave3: - case Intrinsic::vector_interleave5: - case Intrinsic::vector_interleave7: + else if (isInterleaveIntrinsic(II->getIntrinsicID())) Changed |= lowerInterleaveIntrinsic(II, DeadInsts); - break; - default: - break; - } } } |