diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 42 |
1 files changed, 29 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index c49aab8..a6cef52 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -1193,19 +1193,35 @@ int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num, Value *Den, unsigned AtLeast, bool IsSigned) const { const DataLayout &DL = Mod->getDataLayout(); - unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I); - if (LHSSignBits < AtLeast) - return -1; - - unsigned RHSSignBits = ComputeNumSignBits(Den, DL, 0, AC, &I); - if (RHSSignBits < AtLeast) - return -1; - - unsigned SignBits = std::min(LHSSignBits, RHSSignBits); - unsigned DivBits = Num->getType()->getScalarSizeInBits() - SignBits; - if (IsSigned) - ++DivBits; - return DivBits; + if (IsSigned) { + unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I); + if (LHSSignBits < AtLeast) + return -1; + + unsigned RHSSignBits = ComputeNumSignBits(Den, DL, 0, AC, &I); + if (RHSSignBits < AtLeast) + return -1; + + unsigned SignBits = std::min(LHSSignBits, RHSSignBits); + unsigned DivBits = Num->getType()->getScalarSizeInBits() - SignBits; + return DivBits + 1; + } else { + KnownBits Known = computeKnownBits(Num, DL, 0, AC, &I); + // We know all bits are used for division for Num or Den in range + // (SignedMax, UnsignedMax] + if (Known.isNegative() || !Known.isNonNegative()) + return -1; + unsigned LHSSignBits = Known.countMinLeadingZeros(); + + Known = computeKnownBits(Den, DL, 0, AC, &I); + if (Known.isNegative() || !Known.isNonNegative()) + return -1; + unsigned RHSSignBits = Known.countMinLeadingZeros(); + + unsigned SignBits = std::min(LHSSignBits, RHSSignBits); + unsigned DivBits = Num->getType()->getScalarSizeInBits() - SignBits; + return DivBits; + } } // The fractional part of a float is enough to accurately represent up to |