diff options
author | choikwa <5455710+choikwa@users.noreply.github.com> | 2024-12-12 15:24:34 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-12 15:24:34 -0500 |
commit | 463e93b95f0887145b51edb81b770eeb4463abc5 (patch) | |
tree | 0245b6324cfab0a6c86ed98d4c3a184fc57215e6 /llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | |
parent | afcb7d4a2eab51977497e43ce6539d2b0ca01071 (diff) | |
download | llvm-463e93b95f0887145b51edb81b770eeb4463abc5.zip llvm-463e93b95f0887145b51edb81b770eeb4463abc5.tar.gz llvm-463e93b95f0887145b51edb81b770eeb4463abc5.tar.bz2 |
Reapply [AMDGPU] prevent shrinking udiv/urem if either operand exceeds signed max (#119325)
This reverts commit 254d206ee2a337cb38ba347c896f7c6a14c7f218.
+Added a fix in ExpandDivRem24 to disqualify if DivNumBits exceed 24.
Original commit & msg:
ce6e955ac374f2b86cbbb73b2f32174dffd85f25.
Handle signed and unsigned path differently in getDivNumBits. Using
computeKnownBits, this rejects shrinking unsigned div/rem if operands
exceed signed max since we know NumSignBits will be always 0.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 39 |
1 files changed, 29 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 75e20c7..e02ef56 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -1195,18 +1195,37 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) { int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num, Value *Den, unsigned AtLeast, bool IsSigned) const { - unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I); - if (LHSSignBits < AtLeast) - return -1; + assert(Num->getType()->getScalarSizeInBits() == + Den->getType()->getScalarSizeInBits()); + unsigned SSBits = Num->getType()->getScalarSizeInBits(); + if (IsSigned) { + unsigned RHSSignBits = ComputeNumSignBits(Den, DL, 0, AC, &I); + if (RHSSignBits < AtLeast) + return -1; + + unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I); + if (LHSSignBits < AtLeast) + return -1; + + unsigned SignBits = std::min(LHSSignBits, RHSSignBits); + unsigned DivBits = SSBits - SignBits + 1; + return DivBits; // a SignBit needs to be reserved for shrinking + } + + // All bits are used for unsigned division for Num or Den in range + // (SignedMax, UnsignedMax]. + KnownBits Known = computeKnownBits(Den, DL, 0, AC, &I); + if (Known.isNegative() || !Known.isNonNegative()) + return SSBits; + unsigned RHSSignBits = Known.countMinLeadingZeros(); - unsigned RHSSignBits = ComputeNumSignBits(Den, DL, 0, AC, &I); - if (RHSSignBits < AtLeast) - return -1; + Known = computeKnownBits(Num, DL, 0, AC, &I); + if (Known.isNegative() || !Known.isNonNegative()) + return SSBits; + unsigned LHSSignBits = Known.countMinLeadingZeros(); unsigned SignBits = std::min(LHSSignBits, RHSSignBits); - unsigned DivBits = Num->getType()->getScalarSizeInBits() - SignBits; - if (IsSigned) - ++DivBits; + unsigned DivBits = SSBits - SignBits; return DivBits; } @@ -1220,7 +1239,7 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24(IRBuilder<> &Builder, // If Num bits <= 24, assume 0 signbits. unsigned AtLeast = (SSBits <= 24) ? 0 : (SSBits - 24 + IsSigned); int DivBits = getDivNumBits(I, Num, Den, AtLeast, IsSigned); - if (DivBits == -1) + if (DivBits == -1 || DivBits > 24) return nullptr; return expandDivRem24Impl(Builder, I, Num, Den, DivBits, IsDiv, IsSigned); } |