diff options
author | Noah Goldstein <goldstein.w.n@gmail.com> | 2024-03-07 16:19:22 -0600 |
---|---|---|
committer | Noah Goldstein <goldstein.w.n@gmail.com> | 2024-03-09 11:06:02 -0600 |
commit | 8d976c7f20fe8d92fe6f54af411594e15fac25ae (patch) | |
tree | db28dbb3093f2b1413f5fc640839061c9c2dafac | |
parent | 938b9204684222d192a3f817da0c33076ed813e2 (diff) | |
download | llvm-8d976c7f20fe8d92fe6f54af411594e15fac25ae.zip llvm-8d976c7f20fe8d92fe6f54af411594e15fac25ae.tar.gz llvm-8d976c7f20fe8d92fe6f54af411594e15fac25ae.tar.bz2 |
[InstCombine] Make `(binop ({s|u}itofp),({s|u}itofp))` transform more flexible to mismatched signs
Instead of taking the sign of the cast operation as the required since
for the transform, only force a sign if an operation is maybe
negative.
This gives us more flexability when checking if the floats are safely
converable to integers.
Closes #84389
4 files changed, 78 insertions, 62 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 57148d7..6a1ef6e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -380,6 +380,10 @@ private: Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt); Instruction *foldCastedBitwiseLogic(BinaryOperator &I); Instruction *foldFBinOpOfIntCasts(BinaryOperator &I); + // Should only be called by `foldFBinOpOfIntCasts`. + Instruction *foldFBinOpOfIntCastsFromSign( + BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps, + Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown); Instruction *foldBinopOfSextBoolToSelect(BinaryOperator &I); Instruction *narrowBinOp(TruncInst &Trunc); Instruction *narrowMaskedBinOp(BinaryOperator &And); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index f3a740c..1a83180 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1406,41 +1406,27 @@ Value *InstCombinerImpl::dyn_castNegVal(Value *V) const { // -> ({s|u}itofp (int_binop x, y)) // 2) (fp_binop ({s|u}itofp x), FpC) // -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC))) -Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) { - Value *IntOps[2] = {nullptr, nullptr}; - Constant *Op1FpC = nullptr; - - // Check for: - // 1) (binop ({s|u}itofp x), ({s|u}itofp y)) - // 2) (binop ({s|u}itofp x), FpC) - if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) && - !match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0])))) - return nullptr; - - if (!match(BO.getOperand(1), m_Constant(Op1FpC)) && - !match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) && - !match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1])))) - return nullptr; +// +// Assuming the sign of the cast for x/y is `OpsFromSigned`. +Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign( + BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps, + Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown) { Type *FPTy = BO.getType(); Type *IntTy = IntOps[0]->getType(); - // Do we have signed casts? - bool OpsFromSigned = isa<SIToFPInst>(BO.getOperand(0)); - unsigned IntSz = IntTy->getScalarSizeInBits(); // This is the maximum number of inuse bits by the integer where the int -> fp // casts are exact. unsigned MaxRepresentableBits = APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics()); - // Cache KnownBits a bit to potentially save some analysis. - WithCache<const Value *> OpsKnown[2] = {IntOps[0], IntOps[1]}; - // Preserve known number of leading bits. This can allow us to trivial nsw/nuw // checks later on. unsigned NumUsedLeadingBits[2] = {IntSz, IntSz}; + // NB: This only comes up if OpsFromSigned is true, so there is no need to + // cache if between calls to `foldFBinOpOfIntCastsFromSign`. auto IsNonZero = [&](unsigned OpNo) -> bool { if (OpsKnown[OpNo].hasKnownBits() && OpsKnown[OpNo].getKnownBits(SQ).isNonZero()) @@ -1449,14 +1435,19 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) { }; auto IsNonNeg = [&](unsigned OpNo) -> bool { - if (OpsKnown[OpNo].hasKnownBits() && - OpsKnown[OpNo].getKnownBits(SQ).isNonNegative()) - return true; - return isKnownNonNegative(IntOps[OpNo], SQ); + // NB: This matches the impl in ValueTracking, we just try to use cached + // knownbits here. If we ever start supporting WithCache for + // `isKnownNonNegative`, change this to an explicit call. + return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative(); }; // Check if we know for certain that ({s|u}itofp op) is exact. auto IsValidPromotion = [&](unsigned OpNo) -> bool { + // Can we treat this operand as the desired sign? + if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) && + !IsNonNeg(OpNo)) + return false; + // If fp precision >= bitwidth(op) then its exact. // NB: This is slightly conservative for `sitofp`. For signed conversion, we // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be @@ -1509,13 +1500,6 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) { return nullptr; if (Op1FpC == nullptr) { - if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(1))) { - // If we have a signed + unsigned, see if we can treat both as signed - // (uitofp nneg x) == (sitofp nneg x). - if (OpsFromSigned ? !IsNonNeg(1) : !IsNonNeg(0)) - return nullptr; - OpsFromSigned = true; - } if (!IsValidPromotion(1)) return nullptr; } @@ -1574,6 +1558,39 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) { return new UIToFPInst(IntBinOp, FPTy); } +// Try to fold: +// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y)) +// -> ({s|u}itofp (int_binop x, y)) +// 2) (fp_binop ({s|u}itofp x), FpC) +// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC))) +Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) { + std::array<Value *, 2> IntOps = {nullptr, nullptr}; + Constant *Op1FpC = nullptr; + // Check for: + // 1) (binop ({s|u}itofp x), ({s|u}itofp y)) + // 2) (binop ({s|u}itofp x), FpC) + if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) && + !match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0])))) + return nullptr; + + if (!match(BO.getOperand(1), m_Constant(Op1FpC)) && + !match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) && + !match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1])))) + return nullptr; + + // Cache KnownBits a bit to potentially save some analysis. + SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]}; + + // Try treating x/y as coming from both `uitofp` and `sitofp`. There are + // different constraints depending on the sign of the cast. + // NB: `(uitofp nneg X)` == `(sitofp nneg X)`. + if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false, + IntOps, Op1FpC, OpsKnown)) + return R; + return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps, + Op1FpC, OpsKnown); +} + /// A binop with a constant operand and a sign-extended boolean operand may be /// converted into a select of constants by applying the binary operation to /// the constant with the two possible values of the extended boolean (0 or -1). diff --git a/llvm/test/Transforms/InstCombine/add-sitofp.ll b/llvm/test/Transforms/InstCombine/add-sitofp.ll index 049db8c..2bdc808 100644 --- a/llvm/test/Transforms/InstCombine/add-sitofp.ll +++ b/llvm/test/Transforms/InstCombine/add-sitofp.ll @@ -6,7 +6,7 @@ define double @x(i32 %a, i32 %b) { ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[A:%.*]], 24 ; CHECK-NEXT: [[N:%.*]] = and i32 [[M]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[N]], 1 -; CHECK-NEXT: [[P:%.*]] = sitofp i32 [[TMP1]] to double +; CHECK-NEXT: [[P:%.*]] = uitofp i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[P]] ; %m = lshr i32 %a, 24 @@ -20,7 +20,7 @@ define double @test(i32 %a) { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[A_AND]], 1 -; CHECK-NEXT: [[RES:%.*]] = sitofp i32 [[TMP1]] to double +; CHECK-NEXT: [[RES:%.*]] = uitofp i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; ; Drop two highest bits to guarantee that %a + 1 doesn't overflow @@ -49,7 +49,7 @@ define double @test_2(i32 %a, i32 %b) { ; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 ; CHECK-NEXT: [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[A_AND]], [[B_AND]] -; CHECK-NEXT: [[RES:%.*]] = sitofp i32 [[TMP1]] to double +; CHECK-NEXT: [[RES:%.*]] = uitofp i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; ; Drop two highest bits to guarantee that %a + %b doesn't overflow @@ -89,7 +89,7 @@ define float @test_3(i32 %a, i32 %b) { ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[A:%.*]], 24 ; CHECK-NEXT: [[N:%.*]] = and i32 [[M]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[N]], 1 -; CHECK-NEXT: [[P:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[P:%.*]] = uitofp i32 [[TMP1]] to float ; CHECK-NEXT: ret float [[P]] ; %m = lshr i32 %a, 24 @@ -104,7 +104,7 @@ define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823> ; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823> ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i32> [[A_AND]], [[B_AND]] -; CHECK-NEXT: [[RES:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double> +; CHECK-NEXT: [[RES:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double> ; CHECK-NEXT: ret <4 x double> [[RES]] ; ; Drop two highest bits to guarantee that %a + %b doesn't overflow diff --git a/llvm/test/Transforms/InstCombine/binop-itofp.ll b/llvm/test/Transforms/InstCombine/binop-itofp.ll index c91cef7..7d2b872 100644 --- a/llvm/test/Transforms/InstCombine/binop-itofp.ll +++ b/llvm/test/Transforms/InstCombine/binop-itofp.ll @@ -110,7 +110,7 @@ define half @test_ui_si_i8_add(i8 noundef %x_in, i8 noundef %y_in) { ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 63 ; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], 63 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i8 %x_in, 63 @@ -125,9 +125,8 @@ define half @test_ui_si_i8_add_overflow(i8 noundef %x_in, i8 noundef %y_in) { ; CHECK-LABEL: @test_ui_si_i8_add_overflow( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 63 ; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], 65 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half -; CHECK-NEXT: [[YF:%.*]] = uitofp i8 [[Y]] to half -; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], [[YF]] +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i8 %x_in, 63 @@ -152,9 +151,8 @@ define half @test_ui_ui_i8_sub_C(i8 noundef %x_in) { define half @test_ui_ui_i8_sub_C_fail_overflow(i8 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i8_sub_C_fail_overflow( -; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 127 -; CHECK-NEXT: [[XF:%.*]] = uitofp i8 [[X]] to half -; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], 0xHD800 +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X_IN:%.*]], -128 +; CHECK-NEXT: [[R:%.*]] = sitofp i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i8 %x_in, 127 @@ -212,8 +210,8 @@ define half @test_si_si_i8_sub_C(i8 noundef %x_in) { define half @test_si_si_i8_sub_C_fail_overflow(i8 noundef %x_in) { ; CHECK-LABEL: @test_si_si_i8_sub_C_fail_overflow( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 65 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half -; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], 0xH5400 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i8 [[X]], 64 +; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i8 %x_in, 65 @@ -242,9 +240,8 @@ define half @test_ui_si_i8_sub_fail_maybe_sign(i8 noundef %x_in, i8 noundef %y_i ; CHECK-LABEL: @test_ui_si_i8_sub_fail_maybe_sign( ; CHECK-NEXT: [[X:%.*]] = or i8 [[X_IN:%.*]], 64 ; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], 63 -; CHECK-NEXT: [[XF:%.*]] = uitofp i8 [[X]] to half -; CHECK-NEXT: [[YF:%.*]] = sitofp i8 [[Y]] to half -; CHECK-NEXT: [[R:%.*]] = fsub half [[XF]], [[YF]] +; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = or i8 %x_in, 64 @@ -273,8 +270,8 @@ define half @test_ui_ui_i8_mul(i8 noundef %x_in, i8 noundef %y_in) { define half @test_ui_ui_i8_mul_C(i8 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i8_mul_C( -; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X_IN:%.*]], 4 -; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[X:%.*]] = shl i8 [[X_IN:%.*]], 4 +; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[X]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i8 %x_in, 15 @@ -368,7 +365,7 @@ define half @test_ui_si_i8_mul(i8 noundef %x_in, i8 noundef %y_in) { ; CHECK-NEXT: [[YY:%.*]] = and i8 [[Y_IN:%.*]], 7 ; CHECK-NEXT: [[Y:%.*]] = add nuw nsw i8 [[YY]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %xx = and i8 %x_in, 6 @@ -386,9 +383,8 @@ define half @test_ui_si_i8_mul_fail_maybe_zero(i8 noundef %x_in, i8 noundef %y_i ; CHECK-NEXT: [[XX:%.*]] = and i8 [[X_IN:%.*]], 7 ; CHECK-NEXT: [[X:%.*]] = add nuw nsw i8 [[XX]], 1 ; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], 7 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half -; CHECK-NEXT: [[YF:%.*]] = uitofp i8 [[Y]] to half -; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %xx = and i8 %x_in, 7 @@ -694,7 +690,7 @@ define half @test_ui_si_i16_mul(i16 noundef %x_in, i16 noundef %y_in) { ; CHECK-NEXT: [[YY:%.*]] = and i16 [[Y_IN:%.*]], 126 ; CHECK-NEXT: [[Y:%.*]] = or disjoint i16 [[YY]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i16 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sitofp i16 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp i16 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %xx = and i16 %x_in, 126 @@ -807,9 +803,8 @@ define half @test_ui_ui_i12_sub_fail_overflow(i12 noundef %x_in, i12 noundef %y_ ; CHECK-LABEL: @test_ui_ui_i12_sub_fail_overflow( ; CHECK-NEXT: [[X:%.*]] = and i12 [[X_IN:%.*]], 1023 ; CHECK-NEXT: [[Y:%.*]] = and i12 [[Y_IN:%.*]], 2047 -; CHECK-NEXT: [[XF:%.*]] = uitofp i12 [[X]] to half -; CHECK-NEXT: [[YF:%.*]] = uitofp i12 [[Y]] to half -; CHECK-NEXT: [[R:%.*]] = fsub half [[XF]], [[YF]] +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i12 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = sitofp i12 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i12 %x_in, 1023 @@ -984,7 +979,7 @@ define half @test_ui_si_i12_mul_nsw(i12 noundef %x_in, i12 noundef %y_in) { ; CHECK-NEXT: [[YY:%.*]] = and i12 [[Y_IN:%.*]], 30 ; CHECK-NEXT: [[Y:%.*]] = or disjoint i12 [[YY]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i12 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sitofp i12 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp i12 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %xx = and i12 %x_in, 31 @@ -1000,8 +995,8 @@ define half @test_ui_si_i12_mul_nsw(i12 noundef %x_in, i12 noundef %y_in) { define float @test_ui_add_with_signed_constant(i32 %shr.i) { ; CHECK-LABEL: @test_ui_add_with_signed_constant( ; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I:%.*]], 32767 -; CHECK-NEXT: [[SUB:%.*]] = uitofp i32 [[AND_I]] to float -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[SUB]], -1.638300e+04 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[AND_I]], -16383 +; CHECK-NEXT: [[ADD:%.*]] = sitofp i32 [[TMP1]] to float ; CHECK-NEXT: ret float [[ADD]] ; %and.i = and i32 %shr.i, 32767 |