diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-11-16 12:40:42 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-16 12:40:42 +0000 |
commit | 51809e4a26a8c6db6cce115822d185fe662dc0fc (patch) | |
tree | 3deadac8b74f1538623f4e2d69281a4a58a9b170 | |
parent | e508bacce45d4fb2ba07d02c55391b858000c3b3 (diff) | |
download | llvm-51809e4a26a8c6db6cce115822d185fe662dc0fc.zip llvm-51809e4a26a8c6db6cce115822d185fe662dc0fc.tar.gz llvm-51809e4a26a8c6db6cce115822d185fe662dc0fc.tar.bz2 |
[DAG] SimplifyDemandedVectorElts - add SimplifyMultipleUse handling to SEXT/ZEXT/TRUNC nodes (#116227)
Allows us to bypass multiple uses of a SEXT/ZEXT/TRUNC node operand
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-ld1.ll | 22 |
2 files changed, 20 insertions, 7 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 80cbc4a..8fbab33 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3724,6 +3724,11 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero, TLO, Depth + 1)) return true; + if (!DemandedElts.isAllOnes()) + if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts( + Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp)); + if (Op.getOpcode() == ISD::ZERO_EXTEND) { // zext(undef) upper bits are guaranteed to be zero. if (DemandedElts.isSubsetOf(KnownUndef)) diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll index d3a8f59..eaa5454 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll @@ -1148,13 +1148,21 @@ define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) { ; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s define void @ld1r_2s_from_dup(ptr nocapture %a, ptr nocapture %b, ptr nocapture %diff) nounwind ssp { -; CHECK-LABEL: ld1r_2s_from_dup: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ld1r.2s { v0 }, [x0] -; CHECK-NEXT: ld1r.2s { v1 }, [x1] -; CHECK-NEXT: usubl.8h v0, v0, v1 -; CHECK-NEXT: str d0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld1r_2s_from_dup: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: usubl.8h v0, v0, v1 +; CHECK-SD-NEXT: str d0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld1r_2s_from_dup: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ld1r.2s { v0 }, [x0] +; CHECK-GI-NEXT: ld1r.2s { v1 }, [x1] +; CHECK-GI-NEXT: usubl.8h v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x2] +; CHECK-GI-NEXT: ret entry: %tmp1 = load i32, ptr %a, align 4 %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0 |