aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis/VectorUtils.cpp
diff options
context:
space:
mode:
authorKerry McLaughlin <kerry.mclaughlin@arm.com>2025-09-01 13:53:30 +0100
committerGitHub <noreply@github.com>2025-09-01 13:53:30 +0100
commitf0e9bba024d44b55d54b02025623ce4a3ba5a37c (patch)
tree38683bafa768fa6424f372eca65628df21c4d5a4 /llvm/lib/Analysis/VectorUtils.cpp
parent5f412415645edb0d735dbc58306e9deb6ab0bcd4 (diff)
downloadllvm-f0e9bba024d44b55d54b02025623ce4a3ba5a37c.zip
llvm-f0e9bba024d44b55d54b02025623ce4a3ba5a37c.tar.gz
llvm-f0e9bba024d44b55d54b02025623ce4a3ba5a37c.tar.bz2
[LoopVectorize] Generate wide active lane masks (#147535)
This patch adds a new flag (-enable-wide-lane-mask) which allows LoopVectorize to generate wider-than-VF active lane masks when it is safe to do so (i.e. the mask is used for data and control flow). The transform in extractFromWideActiveLaneMask creates vector extracts from the first active lane mask in the header & loop body, modifying the active lane mask phi operands to use the extracts. An additional operand is passed to the ActiveLaneMask instruction, the value of which is used as a multiplier of VF when generating the mask. By default this is 1, and is updated to UF by extractFromWideActiveLaneMask. The motivation for this change is to improve interleaved loops when SVE2.1 is available, where we can make use of the whilelo instruction which returns a predicate pair. This is based on a PR that was created by @momchil-velikov (#81140) and contains tests which were added there.
Diffstat (limited to 'llvm/lib/Analysis/VectorUtils.cpp')
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp2
1 files changed, 2 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 425ea31..091d948 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -166,6 +166,7 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
case Intrinsic::is_fpclass:
case Intrinsic::vp_is_fpclass:
case Intrinsic::powi:
+ case Intrinsic::vector_extract:
return (ScalarOpdIdx == 1);
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
@@ -200,6 +201,7 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
case Intrinsic::vp_llrint:
case Intrinsic::ucmp:
case Intrinsic::scmp:
+ case Intrinsic::vector_extract:
return OpdIdx == -1 || OpdIdx == 0;
case Intrinsic::modf:
case Intrinsic::sincos: