aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis/VectorUtils.cpp
diff options
context:
space:
mode:
authorLuke Lau <luke@igalia.com>2025-06-12 11:09:09 +0100
committerGitHub <noreply@github.com>2025-06-12 11:09:09 +0100
commit7ef77eb9984d1fb537a409cf4be89560fbb681fe (patch)
tree40f124347a3b2e11fddf20da07bbd4c543c17012 /llvm/lib/Analysis/VectorUtils.cpp
parent5987f1ee5cc59a05961156c04010ab0f3c857628 (diff)
downloadllvm-7ef77eb9984d1fb537a409cf4be89560fbb681fe.zip
llvm-7ef77eb9984d1fb537a409cf4be89560fbb681fe.tar.gz
llvm-7ef77eb9984d1fb537a409cf4be89560fbb681fe.tar.bz2
[LV] Support scalable interleave groups for factors 3,5,6 and 7 (#141865)
Currently the loop vectorizer can only vectorize interleave groups for power-of-2 factors at scalable VFs by recursively interleaving [de]interleave2 intrinsics. However after https://github.com/llvm/llvm-project/pull/124825 and #139893, we now have [de]interleave intrinsics for all factors up to 8, which is enough to support all types of segmented loads and stores on RISC-V. Now that the interleaved access pass has been taught to lower these in #139373 and #141512, this patch teaches the loop vectorizer to emit these intrinsics for factors up to 8, which enables scalable vectorization for non-power-of-2 factors. As far as I'm aware, no in-tree target will vectorize a scalable interelave group above factor 8 because the maximum interleave factor is capped at 4 on AArch64 and 8 on RISC-V, and the `-max-interleave-group-factor` CLI option defaults to 8, so the recursive [de]interleaving code has been removed for now. Factors of 3 with scalable VFs are also turned off in AArch64 since there's no lowering for [de]interleave3 just yet either.
Diffstat (limited to 'llvm/lib/Analysis/VectorUtils.cpp')
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp24
1 files changed, 24 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 968fd2f..63fccee 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -240,6 +240,30 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
return Intrinsic::not_intrinsic;
}
+struct InterleaveIntrinsic {
+ Intrinsic::ID Interleave, Deinterleave;
+};
+
+static InterleaveIntrinsic InterleaveIntrinsics[] = {
+ {Intrinsic::vector_interleave2, Intrinsic::vector_deinterleave2},
+ {Intrinsic::vector_interleave3, Intrinsic::vector_deinterleave3},
+ {Intrinsic::vector_interleave4, Intrinsic::vector_deinterleave4},
+ {Intrinsic::vector_interleave5, Intrinsic::vector_deinterleave5},
+ {Intrinsic::vector_interleave6, Intrinsic::vector_deinterleave6},
+ {Intrinsic::vector_interleave7, Intrinsic::vector_deinterleave7},
+ {Intrinsic::vector_interleave8, Intrinsic::vector_deinterleave8},
+};
+
+Intrinsic::ID llvm::getInterleaveIntrinsicID(unsigned Factor) {
+ assert(Factor >= 2 && Factor <= 8 && "Unexpected factor");
+ return InterleaveIntrinsics[Factor - 2].Interleave;
+}
+
+Intrinsic::ID llvm::getDeinterleaveIntrinsicID(unsigned Factor) {
+ assert(Factor >= 2 && Factor <= 8 && "Unexpected factor");
+ return InterleaveIntrinsics[Factor - 2].Deinterleave;
+}
+
/// Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.