2 files changed, 73 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 777bbf0..b05649c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4780,6 +4780,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
         (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1);
     return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1;
   }
+  case ISD::AVGCEILS:
+  case ISD::AVGFLOORS:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    if (Tmp == 1)
+      return 1; // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    return std::min(Tmp, Tmp2);
   case ISD::SREM:
     // The sign bit is the LHS's sign bit, except when the result of the
     // remainder is zero. The magnitude of the result should be less than or
diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll
index c0f7678..28f4547 100644
--- a/llvm/test/CodeGen/AArch64/hadd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll
@@ -955,6 +955,71 @@ define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
   ret <8 x i16> %r0
 }
 
+; Remove unnecessary sign_extend_inreg after shadd
+define <2 x i32> @shadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
+; CHECK-LABEL: shadd_signbits_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #17
+; CHECK-NEXT:    sshr v1.2s, v1.2s, #17
+; CHECK-NEXT:    shadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
+  %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
+  %m = and <2 x i32> %x0, %x1
+  %s = xor <2 x i32> %x0, %x1
+  %x = ashr <2 x i32> %s, <i32 1, i32 1>
+  %avg = add <2 x i32> %m, %x
+  %avg1 = shl <2 x i32> %avg, <i32 17, i32 17>
+  %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17>
+  store <2 x i32> %avg, ptr %p2 ; extra use
+  ret <2 x i32> %avg2
+}
+
+; Remove unnecessary sign_extend_inreg after srhadd
+define <2 x i32> @srhadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
+; CHECK-LABEL: srhadd_signbits_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #17
+; CHECK-NEXT:    sshr v1.2s, v1.2s, #17
+; CHECK-NEXT:    srhadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
+  %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
+  %m = or <2 x i32> %x0, %x1
+  %s = xor <2 x i32> %x0, %x1
+  %x = ashr <2 x i32> %s, <i32 1, i32 1>
+  %avg = sub <2 x i32> %m, %x
+  %avg1 = shl <2 x i32> %avg, <i32 17, i32 17>
+  %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17>
+  store <2 x i32> %avg, ptr %p2 ; extra use
+  ret <2 x i32> %avg2
+}
+
+; negative test - not enough signbits to remove sign_extend_inreg after srhadd
+define <2 x i32> @srhadd_signbits_v2i32_negative(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
+; CHECK-LABEL: srhadd_signbits_v2i32_negative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #17
+; CHECK-NEXT:    sshr v1.2s, v1.2s, #17
+; CHECK-NEXT:    srhadd v1.2s, v0.2s, v1.2s
+; CHECK-NEXT:    shl v0.2s, v1.2s, #22
+; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #22
+; CHECK-NEXT:    ret
+  %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
+  %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
+  %m = or <2 x i32> %x0, %x1
+  %s = xor <2 x i32> %x0, %x1
+  %x = ashr <2 x i32> %s, <i32 1, i32 1>
+  %avg = sub <2 x i32> %m, %x
+  %avg1 = shl <2 x i32> %avg, <i32 22, i32 22>
+  %avg2 = ashr <2 x i32> %avg1, <i32 22, i32 22>
+  store <2 x i32> %avg, ptr %p2 ; extra use
+  ret <2 x i32> %avg2
+}
+
 declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
 declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
 declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
@@ -979,4 +1044,4 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
 declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>)
 declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>)
 declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)
-\ No newline at end of file
+declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)