aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSander de Smalen <sander.desmalen@arm.com>2024-06-19 14:25:57 +0100
committerGitHub <noreply@github.com>2024-06-19 14:25:57 +0100
commitca423a26e7bfc31a36c9ad790b0ae1bb9be18836 (patch)
tree77100116d72a75741995d3c95856f9228d53bf1d
parent93831c73ea51dcf4dc1832a4ea5616b819d36f31 (diff)
downloadllvm-ca423a26e7bfc31a36c9ad790b0ae1bb9be18836.zip
llvm-ca423a26e7bfc31a36c9ad790b0ae1bb9be18836.tar.gz
llvm-ca423a26e7bfc31a36c9ad790b0ae1bb9be18836.tar.bz2
[AArch64] Avoid using NEON BSL for streaming[-compatible] functions (#95803)
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp8
-rw-r--r--llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll99
2 files changed, 71 insertions, 36 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0f0606c..c790209 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18244,9 +18244,11 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
if (!VT.isVector())
return SDValue();
- // The combining code works for NEON, SVE2 and SME.
- if (TLI.useSVEForFixedLengthVectorVT(VT, !Subtarget.isNeonAvailable()) ||
- (VT.isScalableVector() && !Subtarget.hasSVE2()))
+ if (VT.isScalableVector() && !Subtarget.hasSVE2())
+ return SDValue();
+
+ if (VT.isFixedLengthVector() &&
+ (!Subtarget.isNeonAvailable() || TLI.useSVEForFixedLengthVectorVT(VT)))
return SDValue();
SDValue N0 = N->getOperand(0);
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index b908dd6..d65e87d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -34,39 +34,72 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %r
;
; NONEON-NOSVE-LABEL: fixed_bitselect_v8i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [x1]
-; NONEON-NOSVE-NEXT: ldp q5, q4, [x2]
-; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #28]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #60]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #56]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #52]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #48]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #44]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #40]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #36]
-; NONEON-NOSVE-NEXT: ldr w8, [sp]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
-; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v5.16b
-; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v4.16b
-; NONEON-NOSVE-NEXT: add sp, sp, #64
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q4, q5, [x2]
+; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #-128]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
+; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #48]
+; NONEON-NOSVE-NEXT: ldp w8, w14, [sp, #48]
+; NONEON-NOSVE-NEXT: ldp w9, w4, [sp, #64]
+; NONEON-NOSVE-NEXT: ldp w13, w11, [sp, #56]
+; NONEON-NOSVE-NEXT: neg w3, w8
+; NONEON-NOSVE-NEXT: neg w15, w14
+; NONEON-NOSVE-NEXT: str q4, [sp, #32]
+; NONEON-NOSVE-NEXT: and w9, w3, w9
+; NONEON-NOSVE-NEXT: and w15, w15, w4
+; NONEON-NOSVE-NEXT: str q5, [sp, #80]
+; NONEON-NOSVE-NEXT: ldp w5, w3, [sp, #72]
+; NONEON-NOSVE-NEXT: ldp w16, w12, [sp]
+; NONEON-NOSVE-NEXT: neg w4, w11
+; NONEON-NOSVE-NEXT: neg w2, w13
+; NONEON-NOSVE-NEXT: sub w11, w11, #1
+; NONEON-NOSVE-NEXT: and w3, w4, w3
+; NONEON-NOSVE-NEXT: and w2, w2, w5
+; NONEON-NOSVE-NEXT: sub w13, w13, #1
+; NONEON-NOSVE-NEXT: ldp w6, w4, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp w10, w17, [sp, #8]
+; NONEON-NOSVE-NEXT: neg w1, w16
+; NONEON-NOSVE-NEXT: neg w0, w12
+; NONEON-NOSVE-NEXT: sub w16, w16, #1
+; NONEON-NOSVE-NEXT: and w1, w1, w6
+; NONEON-NOSVE-NEXT: and w0, w0, w4
+; NONEON-NOSVE-NEXT: sub w12, w12, #1
+; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #24]
+; NONEON-NOSVE-NEXT: neg w18, w17
+; NONEON-NOSVE-NEXT: neg w4, w10
+; NONEON-NOSVE-NEXT: sub w17, w17, #1
+; NONEON-NOSVE-NEXT: sub w10, w10, #1
+; NONEON-NOSVE-NEXT: sub w14, w14, #1
+; NONEON-NOSVE-NEXT: sub w8, w8, #1
+; NONEON-NOSVE-NEXT: and w4, w4, w5
+; NONEON-NOSVE-NEXT: and w18, w18, w6
+; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #32]
+; NONEON-NOSVE-NEXT: and w16, w16, w5
+; NONEON-NOSVE-NEXT: and w12, w12, w6
+; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #40]
+; NONEON-NOSVE-NEXT: and w10, w10, w5
+; NONEON-NOSVE-NEXT: and w17, w17, w6
+; NONEON-NOSVE-NEXT: orr w17, w17, w18
+; NONEON-NOSVE-NEXT: orr w10, w10, w4
+; NONEON-NOSVE-NEXT: ldp w18, w4, [sp, #88]
+; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #80]
+; NONEON-NOSVE-NEXT: stp w10, w17, [sp, #104]
+; NONEON-NOSVE-NEXT: orr w10, w12, w0
+; NONEON-NOSVE-NEXT: orr w12, w16, w1
+; NONEON-NOSVE-NEXT: and w11, w11, w4
+; NONEON-NOSVE-NEXT: stp w12, w10, [sp, #96]
+; NONEON-NOSVE-NEXT: and w10, w13, w18
+; NONEON-NOSVE-NEXT: orr w11, w11, w3
+; NONEON-NOSVE-NEXT: and w12, w14, w6
+; NONEON-NOSVE-NEXT: orr w10, w10, w2
+; NONEON-NOSVE-NEXT: and w8, w8, w5
+; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #120]
+; NONEON-NOSVE-NEXT: orr w10, w12, w15
+; NONEON-NOSVE-NEXT: orr w8, w8, w9
+; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #112]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT: add sp, sp, #128
; NONEON-NOSVE-NEXT: ret
%pre_cond = load <8 x i32>, ptr %pre_cond_ptr
%left = load <8 x i32>, ptr %left_ptr