diff options
author | hanbeom <kese111@gmail.com> | 2024-04-10 11:19:09 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-10 11:19:09 +0900 |
commit | 44c79da3ae90795ca8b252e8a92910eee9d889c0 (patch) | |
tree | c485127b5d8f7e61a8f9f550e43a49a20104f79d | |
parent | ee52add6cb4a6a4ba4beb941c1f2cfa82266e0df (diff) | |
download | llvm-44c79da3ae90795ca8b252e8a92910eee9d889c0.zip llvm-44c79da3ae90795ca8b252e8a92910eee9d889c0.tar.gz llvm-44c79da3ae90795ca8b252e8a92910eee9d889c0.tar.bz2 |
[InstCombine] Remove shl if we only demand known signbits of shift source (#79014)
This patch resolve TODO written in commit:
https://github.com/llvm/llvm-project/commit/5909c678831f3a5c1669f6906f777d4ec4532fa1
Proof: https://alive2.llvm.org/ce/z/C3VNoR
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 42 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/shl-demand.ll | 118 |
2 files changed, 143 insertions, 17 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index c691c8b..6739b874 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -655,25 +655,33 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } } - // TODO: If we only want bits that already match the signbit then we don't + // We only want bits that already match the signbit then we don't // need to shift. + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth - 1); + if (DemandedMask.countr_zero() >= ShiftAmt) { + if (I->hasNoSignedWrap()) { + unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); + unsigned SignBits = + ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI); + if (SignBits > ShiftAmt && SignBits - ShiftAmt >= NumHiDemandedBits) + return I->getOperand(0); + } - // If we can pre-shift a right-shifted constant to the left without - // losing any high bits amd we don't demand the low bits, then eliminate - // the left-shift: - // (C >> X) << LeftShiftAmtC --> (C << RightShiftAmtC) >> X - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); - Value *X; - Constant *C; - if (DemandedMask.countr_zero() >= ShiftAmt && - match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) { - Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt); - Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::Shl, C, - LeftShiftAmtC, DL); - if (ConstantFoldBinaryOpOperands(Instruction::LShr, NewC, LeftShiftAmtC, - DL) == C) { - Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X); - return InsertNewInstWith(Lshr, I->getIterator()); + // If we can pre-shift a right-shifted constant to the left without + // losing any high bits and we don't demand the low bits, then eliminate + // the left-shift: + // (C >> X) << LeftShiftAmtC --> (C << LeftShiftAmtC) >> X + Value *X; + Constant *C; + if (match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) { + Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt); + Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::Shl, C, + LeftShiftAmtC, DL); + if (ConstantFoldBinaryOpOperands(Instruction::LShr, NewC, + LeftShiftAmtC, DL) == C) { + Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X); + return InsertNewInstWith(Lshr, I->getIterator()); + } } } diff --git a/llvm/test/Transforms/InstCombine/shl-demand.ll b/llvm/test/Transforms/InstCombine/shl-demand.ll index 8575289..26175eb 100644 --- a/llvm/test/Transforms/InstCombine/shl-demand.ll +++ b/llvm/test/Transforms/InstCombine/shl-demand.ll @@ -1,6 +1,124 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +; If we only want bits that already match the signbit then we don't need to shift. +; https://alive2.llvm.org/ce/z/WJBPVt +define i32 @src_srem_shl_demand_max_signbit(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_max_signbit( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 2 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SREM]], -2147483648 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 2 ; srem = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSD + %shl = shl i32 %srem, 30 ; shl = SD000000000000000000000000000000 + %mask = and i32 %shl, -2147483648 ; mask = 10000000000000000000000000000000 + ret i32 %mask +} + +define i32 @src_srem_shl_demand_min_signbit(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_min_signbit( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 1073741823 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SREM]], -2147483648 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 1073741823 ; srem = SSDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD + %shl = shl i32 %srem, 1 ; shl = SDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD0 + %mask = and i32 %shl, -2147483648 ; mask = 10000000000000000000000000000000 + ret i32 %mask +} + +define i32 @src_srem_shl_demand_max_mask(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_max_mask( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 2 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SREM]], -4 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 2 ; srem = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSD + %shl = shl i32 %srem, 1 ; shl = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSD0 + %mask = and i32 %shl, -4 ; mask = 11111111111111111111111111111100 + ret i32 %mask +} + +; Negative test - mask demands non-signbit from shift source +define i32 @src_srem_shl_demand_max_signbit_mask_hit_first_demand(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_max_signbit_mask_hit_first_demand( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 4 +; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[SREM]], 29 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SHL]], -1073741824 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 4 ; srem = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSDD + %shl = shl i32 %srem, 29 ; shl = SDD00000000000000000000000000000 + %mask = and i32 %shl, -1073741824 ; mask = 11000000000000000000000000000000 + ret i32 %mask +} + +define i32 @src_srem_shl_demand_min_signbit_mask_hit_last_demand(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_min_signbit_mask_hit_last_demand( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 536870912 +; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[SREM]], 1 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SHL]], -1073741822 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 536870912 ; srem = SSSDDDDDDDDDDDDDDDDDDDDDDDDDDDDD + %shl = shl i32 %srem, 1 ; shl = SSDDDDDDDDDDDDDDDDDDDDDDDDDDDDD0 + %mask = and i32 %shl, -1073741822 ; mask = 11000000000000000000000000000010 + ret i32 %mask +} + +define i32 @src_srem_shl_demand_eliminate_signbit(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_eliminate_signbit( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 1073741824 +; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[SREM]], 1 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SHL]], 2 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 1073741824 ; srem = SSDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD + %shl = shl i32 %srem, 1 ; shl = DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD0 + %mask = and i32 %shl, 2 ; mask = 00000000000000000000000000000010 + ret i32 %mask +} + +define i32 @src_srem_shl_demand_max_mask_hit_demand(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_max_mask_hit_demand( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 4 +; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[SREM]], 1 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SHL]], -4 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 4 ; srem = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSDD + %shl= shl i32 %srem, 1 ; shl = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSDD0 + %mask = and i32 %shl, -4 ; mask = 11111111111111111111111111111100 + ret i32 %mask +} + +define <2 x i32> @src_srem_shl_mask_vector(<2 x i32> %a0) { +; CHECK-LABEL: @src_srem_shl_mask_vector( +; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], <i32 4, i32 4> +; CHECK-NEXT: [[SHL:%.*]] = shl nsw <2 x i32> [[SREM]], <i32 29, i32 29> +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHL]], <i32 -1073741824, i32 -1073741824> +; CHECK-NEXT: ret <2 x i32> [[MASK]] +; + %srem = srem <2 x i32> %a0, <i32 4, i32 4> + %shl = shl <2 x i32> %srem, <i32 29, i32 29> + %mask = and <2 x i32> %shl, <i32 -1073741824, i32 -1073741824> + ret <2 x i32> %mask +} + +define <2 x i32> @src_srem_shl_mask_vector_nonconstant(<2 x i32> %a0, <2 x i32> %a1) { +; CHECK-LABEL: @src_srem_shl_mask_vector_nonconstant( +; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], <i32 4, i32 4> +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[SREM]], [[A1:%.*]] +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHL]], <i32 -1073741824, i32 -1073741824> +; CHECK-NEXT: ret <2 x i32> [[MASK]] +; + %srem = srem <2 x i32> %a0, <i32 4, i32 4> + %shl = shl <2 x i32> %srem, %a1 + %mask = and <2 x i32> %shl, <i32 -1073741824, i32 -1073741824> + ret <2 x i32> %mask +} + define i16 @sext_shl_trunc_same_size(i16 %x, i32 %y) { ; CHECK-LABEL: @sext_shl_trunc_same_size( ; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[X:%.*]] to i32 |