diff options
author | Sanjay Patel <spatel@rotateright.com> | 2022-10-29 10:53:23 -0400 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2022-10-29 12:50:19 -0400 |
commit | 6064e92b0a84b6d52b25ab1e6da90e74c76c6eb0 (patch) | |
tree | b4560028c37b42dd82f3ce570105369b596c4742 | |
parent | 50000ec2cb16a2f7c157164858ae10074064c1ee (diff) | |
download | llvm-6064e92b0a84b6d52b25ab1e6da90e74c76c6eb0.zip llvm-6064e92b0a84b6d52b25ab1e6da90e74c76c6eb0.tar.gz llvm-6064e92b0a84b6d52b25ab1e6da90e74c76c6eb0.tar.bz2 |
[InstCombine] fold mul with incremented "shl 1" factor
X * ((1 << Z) + 1) --> (X << Z) + X
https://alive2.llvm.org/ce/z/P-7WK9
It's possible that we could do better with propagating
no-wrap, but this carries over the existing logic and
appears to be correct.
The naming differences on the existing folds are a result
of using getName() to set the final value via Builder.
That makes it easier to transfer no-wrap rather than the
gymnastics required from the raw create instruction APIs.
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 37 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/mul.ll | 67 |
2 files changed, 61 insertions, 43 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 2388538..c3ad993 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -142,14 +142,33 @@ static Value *foldMulSelectToNegate(BinaryOperator &I, /// Reduce integer multiplication patterns that contain a (1 << Z) factor. /// Callers are expected to call this twice to handle commuted patterns. -static Instruction *foldMulShl1(Value *X, Value *Y, bool HasNSW, bool HasNUW) { +static Value *foldMulShl1(BinaryOperator &Mul, bool CommuteOperands, + InstCombiner::BuilderTy &Builder) { + Value *X = Mul.getOperand(0), *Y = Mul.getOperand(1); + if (CommuteOperands) + std::swap(X, Y); + + const bool HasNSW = Mul.hasNoSignedWrap(); + const bool HasNUW = Mul.hasNoUnsignedWrap(); + // X * (1 << Z) --> X << Z Value *Z; if (match(Y, m_Shl(m_One(), m_Value(Z)))) { - BinaryOperator *Shl = BinaryOperator::CreateShl(X, Z); - Shl->setHasNoUnsignedWrap(HasNUW); - Shl->setHasNoSignedWrap(HasNSW && cast<ShlOperator>(Y)->hasNoSignedWrap()); - return Shl; + bool PropagateNSW = HasNSW && cast<ShlOperator>(Y)->hasNoSignedWrap(); + return Builder.CreateShl(X, Z, Mul.getName(), HasNUW, PropagateNSW); + } + + // Similar to above, but an increment of the shifted value becomes an add: + // X * ((1 << Z) + 1) --> (X * (1 << Z)) + X --> (X << Z) + X + // This increases uses of X, so it may require a freeze, but that is still + // expected to be an improvement because it removes the multiply. + BinaryOperator *Shift; + if (match(Y, m_OneUse(m_Add(m_BinOp(Shift), m_One()))) && + match(Shift, m_OneUse(m_Shl(m_One(), m_Value(Z))))) { + bool PropagateNSW = HasNSW && Shift->hasNoSignedWrap(); + Value *FrX = Builder.CreateFreeze(X, X->getName() + ".fr"); + Value *Shl = Builder.CreateShl(FrX, Z, "mulshl", HasNUW, PropagateNSW); + return Builder.CreateAdd(Shl, FrX, Mul.getName(), HasNUW, PropagateNSW); } return nullptr; @@ -357,10 +376,10 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { match(Op1, m_And(m_Value(), m_One())))) return BinaryOperator::CreateAnd(Op0, Op1); - if (Instruction *R = foldMulShl1(Op0, Op1, HasNSW, HasNUW)) - return R; - if (Instruction *R = foldMulShl1(Op1, Op0, HasNSW, HasNUW)) - return R; + if (Value *R = foldMulShl1(I, /* CommuteOperands */ false, Builder)) + return replaceInstUsesWith(I, R); + if (Value *R = foldMulShl1(I, /* CommuteOperands */ true, Builder)) + return replaceInstUsesWith(I, R); // (zext bool X) * (zext bool Y) --> zext (and X, Y) // (sext bool X) * (sext bool Y) --> zext (and X, Y) diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index acc01c2..8a3e793 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -87,8 +87,8 @@ define i32 @test12(i32 %a, i32 %b) { ; rdar://7293527 define i32 @shl1(i32 %a, i32 %b) { ; CHECK-LABEL: @shl1( -; CHECK-NEXT: [[M:%.*]] = shl i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: ret i32 [[M]] +; CHECK-NEXT: [[M1:%.*]] = shl i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[M1]] ; %shl = shl i32 1, %b %m = mul i32 %shl, %a @@ -97,8 +97,8 @@ define i32 @shl1(i32 %a, i32 %b) { define i32 @shl1_nsw_nsw(i32 %A, i32 %B) { ; CHECK-LABEL: @shl1_nsw_nsw( -; CHECK-NEXT: [[D:%.*]] = shl nsw i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: ret i32 [[D]] +; CHECK-NEXT: [[D1:%.*]] = shl nsw i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[D1]] ; %shl = shl nsw i32 1, %B %D = mul nsw i32 %A, %shl @@ -107,8 +107,8 @@ define i32 @shl1_nsw_nsw(i32 %A, i32 %B) { define <2 x i32> @shl1_nsw_nsw_commute(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: @shl1_nsw_nsw_commute( -; CHECK-NEXT: [[D:%.*]] = shl nsw <2 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: ret <2 x i32> [[D]] +; CHECK-NEXT: [[D1:%.*]] = shl nsw <2 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret <2 x i32> [[D1]] ; %shl = shl nsw <2 x i32> <i32 1, i32 poison>, %B %D = mul nsw <2 x i32> %shl, %A @@ -117,8 +117,8 @@ define <2 x i32> @shl1_nsw_nsw_commute(<2 x i32> %A, <2 x i32> %B) { define i32 @shl1_nuw(i32 %A, i32 %B) { ; CHECK-LABEL: @shl1_nuw( -; CHECK-NEXT: [[D:%.*]] = shl nuw i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: ret i32 [[D]] +; CHECK-NEXT: [[D1:%.*]] = shl nuw i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[D1]] ; %shl = shl i32 1, %B %D = mul nuw i32 %A, %shl @@ -127,8 +127,8 @@ define i32 @shl1_nuw(i32 %A, i32 %B) { define i32 @shl1_nuw_commute(i32 %A, i32 %B) { ; CHECK-LABEL: @shl1_nuw_commute( -; CHECK-NEXT: [[D:%.*]] = shl i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: ret i32 [[D]] +; CHECK-NEXT: [[D1:%.*]] = shl i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[D1]] ; %shl = shl nuw i32 1, %B %D = mul i32 %shl, %A @@ -138,8 +138,8 @@ define i32 @shl1_nuw_commute(i32 %A, i32 %B) { define i32 @shl1_nsw(i32 %A) { ; CHECK-LABEL: @shl1_nsw( ; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[A:%.*]] -; CHECK-NEXT: [[C:%.*]] = shl i32 [[SHL]], [[A]] -; CHECK-NEXT: ret i32 [[C]] +; CHECK-NEXT: [[C1:%.*]] = shl i32 [[SHL]], [[A]] +; CHECK-NEXT: ret i32 [[C1]] ; %shl = shl i32 1, %A %C = mul nsw i32 %shl, %shl @@ -148,10 +148,10 @@ define i32 @shl1_nsw(i32 %A) { define i5 @shl1_increment(i5 %x, i5 %y) { ; CHECK-LABEL: @shl1_increment( -; CHECK-NEXT: [[POW2X:%.*]] = shl i5 1, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = add i5 [[POW2X]], 1 -; CHECK-NEXT: [[M:%.*]] = mul i5 [[X1]], [[Y:%.*]] -; CHECK-NEXT: ret i5 [[M]] +; CHECK-NEXT: [[Y_FR:%.*]] = freeze i5 [[Y:%.*]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl i5 [[Y_FR]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = add i5 [[MULSHL]], [[Y_FR]] +; CHECK-NEXT: ret i5 [[M1]] ; %pow2x = shl i5 1, %x %x1 = add i5 %pow2x, 1 @@ -162,10 +162,9 @@ define i5 @shl1_increment(i5 %x, i5 %y) { define <3 x i5> @shl1_nuw_increment_commute(<3 x i5> %x, <3 x i5> noundef %p) { ; CHECK-LABEL: @shl1_nuw_increment_commute( ; CHECK-NEXT: [[Y:%.*]] = ashr <3 x i5> [[P:%.*]], <i5 1, i5 1, i5 1> -; CHECK-NEXT: [[POW2X:%.*]] = shl <3 x i5> <i5 1, i5 poison, i5 1>, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = add <3 x i5> [[POW2X]], <i5 1, i5 poison, i5 1> -; CHECK-NEXT: [[M:%.*]] = mul nuw <3 x i5> [[Y]], [[X1]] -; CHECK-NEXT: ret <3 x i5> [[M]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl nuw <3 x i5> [[Y]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = add nuw <3 x i5> [[MULSHL]], [[Y]] +; CHECK-NEXT: ret <3 x i5> [[M1]] ; %y = ashr <3 x i5> %p, <i5 1, i5 1, i5 1> ; thwart complexity-based canonicalization %pow2x = shl <3 x i5> <i5 1, i5 poison, i5 1>, %x @@ -176,10 +175,10 @@ define <3 x i5> @shl1_nuw_increment_commute(<3 x i5> %x, <3 x i5> noundef %p) { define i5 @shl1_nsw_increment(i5 %x, i5 %y) { ; CHECK-LABEL: @shl1_nsw_increment( -; CHECK-NEXT: [[POW2X:%.*]] = shl i5 1, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = add i5 [[POW2X]], 1 -; CHECK-NEXT: [[M:%.*]] = mul nsw i5 [[X1]], [[Y:%.*]] -; CHECK-NEXT: ret i5 [[M]] +; CHECK-NEXT: [[Y_FR:%.*]] = freeze i5 [[Y:%.*]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl i5 [[Y_FR]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = add i5 [[MULSHL]], [[Y_FR]] +; CHECK-NEXT: ret i5 [[M1]] ; %pow2x = shl i5 1, %x %x1 = add i5 %pow2x, 1 @@ -189,10 +188,10 @@ define i5 @shl1_nsw_increment(i5 %x, i5 %y) { define i5 @shl1_nsw_nsw_increment(i5 %x, i5 %y) { ; CHECK-LABEL: @shl1_nsw_nsw_increment( -; CHECK-NEXT: [[POW2X:%.*]] = shl nsw i5 1, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = add nuw nsw i5 [[POW2X]], 1 -; CHECK-NEXT: [[M:%.*]] = mul nsw i5 [[X1]], [[Y:%.*]] -; CHECK-NEXT: ret i5 [[M]] +; CHECK-NEXT: [[Y_FR:%.*]] = freeze i5 [[Y:%.*]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl nsw i5 [[Y_FR]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = add nsw i5 [[MULSHL]], [[Y_FR]] +; CHECK-NEXT: ret i5 [[M1]] ; %pow2x = shl nsw i5 1, %x %x1 = add i5 %pow2x, 1 @@ -202,10 +201,10 @@ define i5 @shl1_nsw_nsw_increment(i5 %x, i5 %y) { define i5 @shl1_nsw_nsw_increment_commute(i5 %x, i5 %y) { ; CHECK-LABEL: @shl1_nsw_nsw_increment_commute( -; CHECK-NEXT: [[POW2X:%.*]] = shl nsw i5 1, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = add nuw nsw i5 [[POW2X]], 1 -; CHECK-NEXT: [[M:%.*]] = mul i5 [[X1]], [[Y:%.*]] -; CHECK-NEXT: ret i5 [[M]] +; CHECK-NEXT: [[Y_FR:%.*]] = freeze i5 [[Y:%.*]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl i5 [[Y_FR]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = add i5 [[MULSHL]], [[Y_FR]] +; CHECK-NEXT: ret i5 [[M1]] ; %pow2x = shl nsw i5 1, %x %x1 = add nsw i5 %pow2x, 1 @@ -1089,8 +1088,8 @@ define i64 @test30(i32 %A, i32 %B) { @PR22087 = external global i32 define i32 @test31(i32 %V) { ; CHECK-LABEL: @test31( -; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[V:%.*]], zext (i1 icmp ne (ptr inttoptr (i64 1 to ptr), ptr @PR22087) to i32) -; CHECK-NEXT: ret i32 [[MUL]] +; CHECK-NEXT: [[MUL1:%.*]] = shl i32 [[V:%.*]], zext (i1 icmp ne (ptr inttoptr (i64 1 to ptr), ptr @PR22087) to i32) +; CHECK-NEXT: ret i32 [[MUL1]] ; %mul = mul i32 %V, shl (i32 1, i32 zext (i1 icmp ne (ptr inttoptr (i64 1 to ptr), ptr @PR22087) to i32)) ret i32 %mul |