diff options
author | Noah Goldstein <goldstein.w.n@gmail.com> | 2023-01-27 17:26:19 -0600 |
---|---|---|
committer | Noah Goldstein <goldstein.w.n@gmail.com> | 2023-01-27 20:35:40 -0600 |
commit | baf7f7e5752ceba898e1a434c4512de9d35863b0 (patch) | |
tree | ce433d8c7e21aa99a2e48802b3423130ac6a41ac | |
parent | 09b7692c62599f36093857493a96be782a48b8ee (diff) | |
download | llvm-baf7f7e5752ceba898e1a434c4512de9d35863b0.zip llvm-baf7f7e5752ceba898e1a434c4512de9d35863b0.tar.gz llvm-baf7f7e5752ceba898e1a434c4512de9d35863b0.tar.bz2 |
Recommit "Add optimizations for icmp eq/ne (mul(X, Y), 0)" 2nd Try
First time caused build failure:
https://lab.llvm.org/buildbot/#/builders/183/builds/10447
but after investigating it seems to be unrelated. The same
test/build passed later with the original commit here:
https://lab.llvm.org/buildbot/#/builders/183/builds/10448
1. Add checks if X and/or Y are odd. The Odd values are unnecessary to
the icmp: isZero(Odd * N) == isZero(N)
2. If neither X nor Y is known odd, then if X * Y cannot overflow AND
if X and/or Y is non-zero, the non-zero values are unnecessary to the
icmp.
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D140850
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 42 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/icmp-binop.ll | 27 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/pr38677.ll | 4 |
3 files changed, 52 insertions, 21 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 8e90014..41f1fe9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1295,6 +1295,48 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) { return new ICmpInst(Pred, X, Cmp.getOperand(1)); } + // (icmp eq/ne (mul X Y)) -> (icmp eq/ne X/Y) if we know about whether X/Y are + // odd/non-zero/there is no overflow. + if (match(Cmp.getOperand(0), m_Mul(m_Value(X), m_Value(Y))) && + ICmpInst::isEquality(Pred)) { + + KnownBits XKnown = computeKnownBits(X, 0, &Cmp); + // if X % 2 != 0 + // (icmp eq/ne Y) + if (XKnown.countMaxTrailingZeros() == 0) + return new ICmpInst(Pred, Y, Cmp.getOperand(1)); + + KnownBits YKnown = computeKnownBits(Y, 0, &Cmp); + // if Y % 2 != 0 + // (icmp eq/ne X) + if (YKnown.countMaxTrailingZeros() == 0) + return new ICmpInst(Pred, X, Cmp.getOperand(1)); + + auto *BO0 = cast<OverflowingBinaryOperator>(Cmp.getOperand(0)); + if (BO0->hasNoUnsignedWrap() || BO0->hasNoSignedWrap()) { + const SimplifyQuery Q = SQ.getWithInstruction(&Cmp); + // `isKnownNonZero` does more analysis than just `!KnownBits.One.isZero()` + // but to avoid unnecessary work, first just if this is an obvious case. + + // if X non-zero and NoOverflow(X * Y) + // (icmp eq/ne Y) + if (!XKnown.One.isZero() || isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT)) + return new ICmpInst(Pred, Y, Cmp.getOperand(1)); + + // if Y non-zero and NoOverflow(X * Y) + // (icmp eq/ne X) + if (!YKnown.One.isZero() || isKnownNonZero(Y, DL, 0, Q.AC, Q.CxtI, Q.DT)) + return new ICmpInst(Pred, X, Cmp.getOperand(1)); + } + // Note, we are skipping cases: + // if Y % 2 != 0 AND X % 2 != 0 + // (false/true) + // if X non-zero and Y non-zero and NoOverflow(X * Y) + // (false/true) + // Those can be simplified later as we would have already replaced the (icmp + // eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that + // will fold to a constant elsewhere. + } return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/icmp-binop.ll b/llvm/test/Transforms/InstCombine/icmp-binop.ll index 1f4a0de..60a1241 100644 --- a/llvm/test/Transforms/InstCombine/icmp-binop.ll +++ b/llvm/test/Transforms/InstCombine/icmp-binop.ll @@ -6,8 +6,7 @@ declare void @llvm.assume(i1) define i1 @mul_unkV_oddC_eq(i32 %v) { ; CHECK-LABEL: @mul_unkV_oddC_eq( -; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[V:%.*]], 3 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %mul = mul i32 %v, 3 @@ -28,8 +27,7 @@ define i1 @mul_unkV_oddC_eq_nonzero(i32 %v) { define <2 x i1> @mul_unkV_oddC_ne_vec(<2 x i64> %v) { ; CHECK-LABEL: @mul_unkV_oddC_ne_vec( -; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i64> [[V:%.*]], <i64 3, i64 3> -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i64> [[MUL]], zeroinitializer +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i64> [[V:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %mul = mul <2 x i64> %v, <i64 3, i64 3> @@ -72,7 +70,7 @@ define i1 @mul_unkV_oddC_sge(i8 %v) { define i1 @mul_reused_unkV_oddC_ne(i64 %v) { ; CHECK-LABEL: @mul_reused_unkV_oddC_ne( ; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[V:%.*]], 3 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[V]], 0 ; CHECK-NEXT: call void @use64(i64 [[MUL]]) ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -87,8 +85,7 @@ define i1 @mul_assumeoddV_unkV_eq(i16 %v, i16 %v2) { ; CHECK-NEXT: [[LB:%.*]] = and i16 [[V2:%.*]], 1 ; CHECK-NEXT: [[ODD:%.*]] = icmp ne i16 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[ODD]]) -; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[V:%.*]], [[V2]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[V:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %lb = and i16 %v2, 1 @@ -105,7 +102,7 @@ define i1 @mul_reusedassumeoddV_unkV_ne(i64 %v, i64 %v2) { ; CHECK-NEXT: [[ODD:%.*]] = icmp ne i64 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[ODD]]) ; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[V]], [[V2:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[V2]], 0 ; CHECK-NEXT: call void @use64(i64 [[MUL]]) ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -120,9 +117,7 @@ define i1 @mul_reusedassumeoddV_unkV_ne(i64 %v, i64 %v2) { define <2 x i1> @mul_setoddV_unkV_ne(<2 x i32> %v1, <2 x i32> %v2) { ; CHECK-LABEL: @mul_setoddV_unkV_ne( -; CHECK-NEXT: [[V:%.*]] = or <2 x i32> [[V1:%.*]], <i32 1, i32 1> -; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[V]], [[V2:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[MUL]], zeroinitializer +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[V2:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %v = or <2 x i32> %v1, <i32 1, i32 1> @@ -190,8 +185,7 @@ define i1 @mul_assumenzV_unkV_nsw_ne(i32 %v, i32 %v2) { ; CHECK-LABEL: @mul_assumenzV_unkV_nsw_ne( ; CHECK-NEXT: [[NZ:%.*]] = icmp ne i32 [[V:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[NZ]]) -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[V]], [[V2:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[V2:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %nz = icmp ne i32 %v, 0 @@ -229,9 +223,7 @@ define <2 x i1> @mul_unkV_unkV_nsw_nuw_ne(<2 x i16> %v, <2 x i16> %v2) { define i1 @mul_setnzV_unkV_nuw_eq(i8 %v1, i8 %v2) { ; CHECK-LABEL: @mul_setnzV_unkV_nuw_eq( -; CHECK-NEXT: [[V:%.*]] = or i8 [[V1:%.*]], 2 -; CHECK-NEXT: [[MUL:%.*]] = mul nuw i8 [[V]], [[V2:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[V2:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %v = or i8 %v1, 2 @@ -245,8 +237,7 @@ define i1 @mul_brnzV_unkV_nuw_eq(i64 %v, i64 %v2) { ; CHECK-NEXT: [[NZ_NOT:%.*]] = icmp eq i64 [[V2:%.*]], 0 ; CHECK-NEXT: br i1 [[NZ_NOT]], label [[FALSE:%.*]], label [[TRUE:%.*]] ; CHECK: true: -; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[V:%.*]], [[V2]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[MUL]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[V:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; CHECK: false: ; CHECK-NEXT: call void @use64(i64 [[V]]) diff --git a/llvm/test/Transforms/InstCombine/pr38677.ll b/llvm/test/Transforms/InstCombine/pr38677.ll index 63c0cab..bacee2c 100644 --- a/llvm/test/Transforms/InstCombine/pr38677.ll +++ b/llvm/test/Transforms/InstCombine/pr38677.ll @@ -12,9 +12,7 @@ define i32 @foo(i1 %which, ptr %dst) { ; CHECK-NEXT: br label [[FINAL]] ; CHECK: final: ; CHECK-NEXT: [[USE2:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ select (i1 icmp eq (ptr @A, ptr @B), i32 2, i32 1), [[DELAY]] ] -; CHECK-NEXT: [[B7:%.*]] = mul i32 [[USE2]], 2147483647 -; CHECK-NEXT: [[C3:%.*]] = icmp eq i32 [[B7]], 0 -; CHECK-NEXT: store i1 [[C3]], ptr [[DST:%.*]], align 1 +; CHECK-NEXT: store i1 false, ptr [[DST:%.*]], align 1 ; CHECK-NEXT: ret i32 [[USE2]] ; entry: |