diff options
author | Dinar Temirbulatov <Dinar.Temirbulatov@arm.com> | 2024-04-19 10:48:27 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-19 10:48:27 +0100 |
commit | 6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45 (patch) | |
tree | f35620f97b3d89bbf183fa5f03a35703ae31af4b | |
parent | 054b1b3b5ac67385220654df12732346b51c8a41 (diff) | |
download | llvm-6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.zip llvm-6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.tar.gz llvm-6f26867cfa4c1333e69a17f8a2fc8297a4ab6d45.tar.bz2 |
[AArch64][SVE2] Generate SVE2 BSL instruction in LLVM for add/sub. (#88413)
Allow to fold or/and-and to BSL instuction for scalable vectors.
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/sve2-bsl.ll | 15 |
2 files changed, 17 insertions, 2 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7947d73..3d1453e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17927,11 +17927,11 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, } else continue; - if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode())) + if (!ISD::isConstantSplatVectorAllZeros(Sub.getOperand(0).getNode())) continue; // Constant ones is always righthand operand of the Add. - if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode())) + if (!ISD::isConstantSplatVectorAllOnes(Add.getOperand(1).getNode())) continue; if (Sub.getOperand(1) != Add.getOperand(0)) diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll index 11f6763..23b2622 100644 --- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll +++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll @@ -13,6 +13,21 @@ define <vscale x 4 x i32> @bsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { ret <vscale x 4 x i32> %c } +define <vscale x 4 x i32> @bsl_add_sub(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 { +; CHECK-LABEL: bsl_add_sub: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0 +; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond + %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1) + %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left + %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right + %bsl0000 = or <vscale x 4 x i32> %right_bits_0, %left_bits_0 + ret <vscale x 4 x i32> %bsl0000 +} + ; we are not expecting bsl instruction here. the constants do not match to fold to bsl. define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { ; CHECK-LABEL: no_bsl_fold: |