aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDinar Temirbulatov <Dinar.Temirbulatov@arm.com>2024-04-19 10:48:27 +0100
committerGitHub <noreply@github.com>2024-04-19 10:48:27 +0100
commit6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45 (patch)
treef35620f97b3d89bbf183fa5f03a35703ae31af4b
parent054b1b3b5ac67385220654df12732346b51c8a41 (diff)
downloadllvm-6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.zip
llvm-6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.tar.gz
llvm-6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.tar.bz2
[AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. (#88413)
Allow to fold or/and-and to BSL instuction for scalable vectors.
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp4
-rw-r--r--llvm/test/CodeGen/AArch64/sve2-bsl.ll15
2 files changed, 17 insertions, 2 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7947d73..3d1453e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17927,11 +17927,11 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
} else
continue;
- if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
+ if (!ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()))
continue;
// Constant ones is always righthand operand of the Add.
- if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
+ if (!ISD::isConstantSplatVectorAllOnes(Add.getOperand(1).getNode()))
continue;
if (Sub.getOperand(1) != Add.getOperand(0))
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 11f6763..23b2622 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -13,6 +13,21 @@ define <vscale x 4 x i32> @bsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
ret <vscale x 4 x i32> %c
}
+define <vscale x 4 x i32> @bsl_add_sub(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: bsl_add_sub:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+ %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+ %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
+ %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
+ %bsl0000 = or <vscale x 4 x i32> %right_bits_0, %left_bits_0
+ ret <vscale x 4 x i32> %bsl0000
+}
+
; we are not expecting bsl instruction here. the constants do not match to fold to bsl.
define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: no_bsl_fold: