diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 39 |
1 files changed, 29 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 75e20c7..e02ef56 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -1195,18 +1195,37 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) { int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num, Value *Den, unsigned AtLeast, bool IsSigned) const { - unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I); - if (LHSSignBits < AtLeast) - return -1; + assert(Num->getType()->getScalarSizeInBits() == + Den->getType()->getScalarSizeInBits()); + unsigned SSBits = Num->getType()->getScalarSizeInBits(); + if (IsSigned) { + unsigned RHSSignBits = ComputeNumSignBits(Den, DL, 0, AC, &I); + if (RHSSignBits < AtLeast) + return -1; + + unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I); + if (LHSSignBits < AtLeast) + return -1; + + unsigned SignBits = std::min(LHSSignBits, RHSSignBits); + unsigned DivBits = SSBits - SignBits + 1; + return DivBits; // a SignBit needs to be reserved for shrinking + } + + // All bits are used for unsigned division for Num or Den in range + // (SignedMax, UnsignedMax]. + KnownBits Known = computeKnownBits(Den, DL, 0, AC, &I); + if (Known.isNegative() || !Known.isNonNegative()) + return SSBits; + unsigned RHSSignBits = Known.countMinLeadingZeros(); - unsigned RHSSignBits = ComputeNumSignBits(Den, DL, 0, AC, &I); - if (RHSSignBits < AtLeast) - return -1; + Known = computeKnownBits(Num, DL, 0, AC, &I); + if (Known.isNegative() || !Known.isNonNegative()) + return SSBits; + unsigned LHSSignBits = Known.countMinLeadingZeros(); unsigned SignBits = std::min(LHSSignBits, RHSSignBits); - unsigned DivBits = Num->getType()->getScalarSizeInBits() - SignBits; - if (IsSigned) - ++DivBits; + unsigned DivBits = SSBits - SignBits; return DivBits; } @@ -1220,7 +1239,7 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24(IRBuilder<> &Builder, // If Num bits <= 24, assume 0 signbits. unsigned AtLeast = (SSBits <= 24) ? 0 : (SSBits - 24 + IsSigned); int DivBits = getDivNumBits(I, Num, Den, AtLeast, IsSigned); - if (DivBits == -1) + if (DivBits == -1 || DivBits > 24) return nullptr; return expandDivRem24Impl(Builder, I, Num, Den, DivBits, IsDiv, IsSigned); } |