[AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. (#88413)

Allow to fold or/and-and to BSL instuction for scalable vectors.
author: Dinar Temirbulatov <Dinar.Temirbulatov@arm.com> 2024-04-19 10:48:27 +0100
committer: GitHub <noreply@github.com> 2024-04-19 10:48:27 +0100
commit: 6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45 (patch)
tree: f35620f97b3d89bbf183fa5f03a35703ae31af4b
parent: 054b1b3b5ac67385220654df12732346b51c8a41 (diff)
download: llvm-6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.zip
llvm-6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.tar.gz
llvm-6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.tar.bz2
2 files changed, 17 insertions, 2 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7947d73..3d1453e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17927,11 +17927,11 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
       } else
         continue;
 
-      if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
+      if (!ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode()))
         continue;
 
       // Constant ones is always righthand operand of the Add.
-      if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
+      if (!ISD::isConstantSplatVectorAllOnes(Add.getOperand(1).getNode()))
         continue;
 
       if (Sub.getOperand(1) != Add.getOperand(0))
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 11f6763..23b2622 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -13,6 +13,21 @@ define <vscale x 4 x i32> @bsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
   ret <vscale x 4 x i32> %c
 }
 
+define <vscale x 4 x i32> @bsl_add_sub(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
+; CHECK-LABEL: bsl_add_sub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
+  %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
+  %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left
+  %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right
+  %bsl0000 = or <vscale x 4 x i32> %right_bits_0, %left_bits_0
+  ret <vscale x 4 x i32> %bsl0000
+}
+
 ; we are not expecting bsl instruction here. the constants do not match to fold to bsl.
 define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: no_bsl_fold:
author	Dinar Temirbulatov <Dinar.Temirbulatov@arm.com>	2024-04-19 10:48:27 +0100
committer	GitHub <noreply@github.com>	2024-04-19 10:48:27 +0100
commit	6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45 (patch)
tree	f35620f97b3d89bbf183fa5f03a35703ae31af4b
parent	054b1b3b5ac67385220654df12732346b51c8a41 (diff)
download	llvm-6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.zip llvm-6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.tar.gz llvm-6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.tar.bz2