diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 7 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/hadd-combine.ll | 67 |
2 files changed, 73 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 777bbf0..b05649c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4780,6 +4780,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1); return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1; } + case ISD::AVGCEILS: + case ISD::AVGFLOORS: + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (Tmp == 1) + return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); + return std::min(Tmp, Tmp2); case ISD::SREM: // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. The magnitude of the result should be less than or diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll index c0f7678..28f4547 100644 --- a/llvm/test/CodeGen/AArch64/hadd-combine.ll +++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll @@ -955,6 +955,71 @@ define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %r0 } +; Remove unnecessary sign_extend_inreg after shadd +define <2 x i32> @shadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) { +; CHECK-LABEL: shadd_signbits_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.2s, v0.2s, #17 +; CHECK-NEXT: sshr v1.2s, v1.2s, #17 +; CHECK-NEXT: shadd v0.2s, v0.2s, v1.2s +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %x0 = ashr <2 x i32> %a0, <i32 17, i32 17> + %x1 = ashr <2 x i32> %a1, <i32 17, i32 17> + %m = and <2 x i32> %x0, %x1 + %s = xor <2 x i32> %x0, %x1 + %x = ashr <2 x i32> %s, <i32 1, i32 1> + %avg = add <2 x i32> %m, %x + %avg1 = shl <2 x i32> %avg, <i32 17, i32 17> + %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17> + store <2 x i32> %avg, ptr %p2 ; extra use + ret <2 x i32> %avg2 +} + +; Remove unnecessary sign_extend_inreg after srhadd +define <2 x i32> @srhadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) { +; CHECK-LABEL: srhadd_signbits_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.2s, v0.2s, #17 +; CHECK-NEXT: sshr v1.2s, v1.2s, #17 +; CHECK-NEXT: srhadd v0.2s, v0.2s, v1.2s +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %x0 = ashr <2 x i32> %a0, <i32 17, i32 17> + %x1 = ashr <2 x i32> %a1, <i32 17, i32 17> + %m = or <2 x i32> %x0, %x1 + %s = xor <2 x i32> %x0, %x1 + %x = ashr <2 x i32> %s, <i32 1, i32 1> + %avg = sub <2 x i32> %m, %x + %avg1 = shl <2 x i32> %avg, <i32 17, i32 17> + %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17> + store <2 x i32> %avg, ptr %p2 ; extra use + ret <2 x i32> %avg2 +} + +; negative test - not enough signbits to remove sign_extend_inreg after srhadd +define <2 x i32> @srhadd_signbits_v2i32_negative(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) { +; CHECK-LABEL: srhadd_signbits_v2i32_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.2s, v0.2s, #17 +; CHECK-NEXT: sshr v1.2s, v1.2s, #17 +; CHECK-NEXT: srhadd v1.2s, v0.2s, v1.2s +; CHECK-NEXT: shl v0.2s, v1.2s, #22 +; CHECK-NEXT: str d1, [x0] +; CHECK-NEXT: sshr v0.2s, v0.2s, #22 +; CHECK-NEXT: ret + %x0 = ashr <2 x i32> %a0, <i32 17, i32 17> + %x1 = ashr <2 x i32> %a1, <i32 17, i32 17> + %m = or <2 x i32> %x0, %x1 + %s = xor <2 x i32> %x0, %x1 + %x = ashr <2 x i32> %s, <i32 1, i32 1> + %avg = sub <2 x i32> %m, %x + %avg1 = shl <2 x i32> %avg, <i32 22, i32 22> + %avg2 = ashr <2 x i32> %avg1, <i32 22, i32 22> + store <2 x i32> %avg, ptr %p2 ; extra use + ret <2 x i32> %avg2 +} + declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) @@ -979,4 +1044,4 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) -declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)
\ No newline at end of file +declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) |
