diff options
author | Craig Topper <craig.topper@sifive.com> | 2023-04-24 18:11:05 -0700 |
---|---|---|
committer | Craig Topper <craig.topper@sifive.com> | 2023-04-24 18:17:03 -0700 |
commit | faa2d69e462146543e168cc6c36a28a7e238ecce (patch) | |
tree | 94dcd61c517ec216db3b11c41f7e0a1f7a9c8348 | |
parent | 463412e930b248dab06e0f51d92a8cf0e71072fc (diff) | |
download | llvm-faa2d69e462146543e168cc6c36a28a7e238ecce.zip llvm-faa2d69e462146543e168cc6c36a28a7e238ecce.tar.gz llvm-faa2d69e462146543e168cc6c36a28a7e238ecce.tar.bz2 |
[RISCV] Ensure extract_vector_elt has a single use in combineBinOpToReduce.
Without this, the original reduction will not be removed and we'll
end up with a second reduction. Reductions are expensive operations so
we should avoid that.
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll | 9 |
2 files changed, 5 insertions, 6 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 897a8a6..677c4be 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9120,7 +9120,7 @@ static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, SDValue Extract = N->getOperand(ReduceIdx); SDValue Reduce = Extract.getOperand(0); - if (!Reduce.hasOneUse()) + if (!Extract.hasOneUse() || !Reduce.hasOneUse()) return SDValue(); SDValue ScalarV = Reduce.getOperand(2); diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll index a42f418..ade4247 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll @@ -265,12 +265,11 @@ define float @reduce_fadd3(float %x, <4 x float> %v, ptr %rdxptr) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vfredusum.vs v9, v8, v9 -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vfredusum.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfredusum.vs v8, v8, v9 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fadd.s fa0, fa5, fa0 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret entry: %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %v) |