diff options
Diffstat (limited to 'llvm/lib/Analysis/ConstantFolding.cpp')
-rw-r--r-- | llvm/lib/Analysis/ConstantFolding.cpp | 179 |
1 files changed, 169 insertions, 10 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 9c1c2c6..759c553 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -929,12 +929,11 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, if (!AllConstantInt) break; - // TODO: Try to intersect two inrange attributes? - if (!InRange) { - InRange = GEP->getInRange(); - if (InRange) - // Adjust inrange by offset until now. - InRange = InRange->sextOrTrunc(BitWidth).subtract(Offset); + // Adjust inrange offset and intersect inrange attributes + if (auto GEPRange = GEP->getInRange()) { + auto AdjustedGEPRange = GEPRange->sextOrTrunc(BitWidth).subtract(Offset); + InRange = + InRange ? InRange->intersectWith(AdjustedGEPRange) : AdjustedGEPRange; } Ptr = cast<Constant>(GEP->getOperand(0)); @@ -1801,6 +1800,44 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::nvvm_d2ull_rn: case Intrinsic::nvvm_d2ull_rp: case Intrinsic::nvvm_d2ull_rz: + + // NVVM math intrinsics: + case Intrinsic::nvvm_ceil_d: + case Intrinsic::nvvm_ceil_f: + case Intrinsic::nvvm_ceil_ftz_f: + + case Intrinsic::nvvm_fabs: + case Intrinsic::nvvm_fabs_ftz: + + case Intrinsic::nvvm_floor_d: + case Intrinsic::nvvm_floor_f: + case Intrinsic::nvvm_floor_ftz_f: + + case Intrinsic::nvvm_rcp_rm_d: + case Intrinsic::nvvm_rcp_rm_f: + case Intrinsic::nvvm_rcp_rm_ftz_f: + case Intrinsic::nvvm_rcp_rn_d: + case Intrinsic::nvvm_rcp_rn_f: + case Intrinsic::nvvm_rcp_rn_ftz_f: + case Intrinsic::nvvm_rcp_rp_d: + case Intrinsic::nvvm_rcp_rp_f: + case Intrinsic::nvvm_rcp_rp_ftz_f: + case Intrinsic::nvvm_rcp_rz_d: + case Intrinsic::nvvm_rcp_rz_f: + case Intrinsic::nvvm_rcp_rz_ftz_f: + + case Intrinsic::nvvm_round_d: + case Intrinsic::nvvm_round_f: + case Intrinsic::nvvm_round_ftz_f: + + case Intrinsic::nvvm_saturate_d: + case Intrinsic::nvvm_saturate_f: + case Intrinsic::nvvm_saturate_ftz_f: + + case Intrinsic::nvvm_sqrt_f: + case Intrinsic::nvvm_sqrt_rn_d: + case Intrinsic::nvvm_sqrt_rn_f: + case Intrinsic::nvvm_sqrt_rn_ftz_f: return !Call->isStrictFP(); // Sign operations are actually bitwise operations, they do not raise @@ -1818,6 +1855,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::nearbyint: case Intrinsic::rint: case Intrinsic::canonicalize: + // Constrained intrinsics can be folded if FP environment is known // to compiler. case Intrinsic::experimental_constrained_fma: @@ -1971,16 +2009,49 @@ static APFloat FTZPreserveSign(const APFloat &V) { return V; } -Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, - Type *Ty) { +static APFloat FlushToPositiveZero(const APFloat &V) { + if (V.isDenormal()) + return APFloat::getZero(V.getSemantics(), false); + return V; +} + +static APFloat FlushWithDenormKind(const APFloat &V, + DenormalMode::DenormalModeKind DenormKind) { + assert(DenormKind != DenormalMode::DenormalModeKind::Invalid && + DenormKind != DenormalMode::DenormalModeKind::Dynamic); + switch (DenormKind) { + case DenormalMode::DenormalModeKind::IEEE: + return V; + case DenormalMode::DenormalModeKind::PreserveSign: + return FTZPreserveSign(V); + case DenormalMode::DenormalModeKind::PositiveZero: + return FlushToPositiveZero(V); + default: + llvm_unreachable("Invalid denormal mode!"); + } +} + +Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, Type *Ty, + DenormalMode DenormMode = DenormalMode::getIEEE()) { + if (!DenormMode.isValid() || + DenormMode.Input == DenormalMode::DenormalModeKind::Dynamic || + DenormMode.Output == DenormalMode::DenormalModeKind::Dynamic) + return nullptr; + llvm_fenv_clearexcept(); - double Result = NativeFP(V.convertToDouble()); + auto Input = FlushWithDenormKind(V, DenormMode.Input); + double Result = NativeFP(Input.convertToDouble()); if (llvm_fenv_testexcept()) { llvm_fenv_clearexcept(); return nullptr; } - return GetConstantFoldFPValue(Result, Ty); + Constant *Output = GetConstantFoldFPValue(Result, Ty); + if (DenormMode.Output == DenormalMode::DenormalModeKind::IEEE) + return Output; + const auto *CFP = static_cast<ConstantFP *>(Output); + const auto Res = FlushWithDenormKind(CFP->getValueAPF(), DenormMode.Output); + return ConstantFP::get(Ty->getContext(), Res); } #if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128) @@ -2550,6 +2621,94 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantFoldFP(atan, APF, Ty); case Intrinsic::sqrt: return ConstantFoldFP(sqrt, APF, Ty); + + // NVVM Intrinsics: + case Intrinsic::nvvm_ceil_ftz_f: + case Intrinsic::nvvm_ceil_f: + case Intrinsic::nvvm_ceil_d: + return ConstantFoldFP( + ceil, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + case Intrinsic::nvvm_fabs_ftz: + case Intrinsic::nvvm_fabs: + return ConstantFoldFP( + fabs, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + case Intrinsic::nvvm_floor_ftz_f: + case Intrinsic::nvvm_floor_f: + case Intrinsic::nvvm_floor_d: + return ConstantFoldFP( + floor, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + case Intrinsic::nvvm_rcp_rm_ftz_f: + case Intrinsic::nvvm_rcp_rn_ftz_f: + case Intrinsic::nvvm_rcp_rp_ftz_f: + case Intrinsic::nvvm_rcp_rz_ftz_f: + case Intrinsic::nvvm_rcp_rm_d: + case Intrinsic::nvvm_rcp_rm_f: + case Intrinsic::nvvm_rcp_rn_d: + case Intrinsic::nvvm_rcp_rn_f: + case Intrinsic::nvvm_rcp_rp_d: + case Intrinsic::nvvm_rcp_rp_f: + case Intrinsic::nvvm_rcp_rz_d: + case Intrinsic::nvvm_rcp_rz_f: { + APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode(IntrinsicID); + bool IsFTZ = nvvm::RCPShouldFTZ(IntrinsicID); + + auto Denominator = IsFTZ ? FTZPreserveSign(APF) : APF; + APFloat Res = APFloat::getOne(APF.getSemantics()); + APFloat::opStatus Status = Res.divide(Denominator, RoundMode); + + if (Status == APFloat::opOK || Status == APFloat::opInexact) { + if (IsFTZ) + Res = FTZPreserveSign(Res); + return ConstantFP::get(Ty->getContext(), Res); + } + return nullptr; + } + + case Intrinsic::nvvm_round_ftz_f: + case Intrinsic::nvvm_round_f: + case Intrinsic::nvvm_round_d: { + // Use APFloat implementation instead of native libm call, as some + // implementations (e.g. on PPC) do not preserve the sign of negative 0. + bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID); + auto V = IsFTZ ? FTZPreserveSign(APF) : APF; + V.roundToIntegral(APFloat::rmNearestTiesToAway); + return ConstantFP::get(Ty->getContext(), V); + } + + case Intrinsic::nvvm_saturate_ftz_f: + case Intrinsic::nvvm_saturate_d: + case Intrinsic::nvvm_saturate_f: { + bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID); + auto V = IsFTZ ? FTZPreserveSign(APF) : APF; + if (V.isNegative() || V.isZero() || V.isNaN()) + return ConstantFP::getZero(Ty); + APFloat One = APFloat::getOne(APF.getSemantics()); + if (V > One) + return ConstantFP::get(Ty->getContext(), One); + return ConstantFP::get(Ty->getContext(), APF); + } + + case Intrinsic::nvvm_sqrt_rn_ftz_f: + case Intrinsic::nvvm_sqrt_f: + case Intrinsic::nvvm_sqrt_rn_d: + case Intrinsic::nvvm_sqrt_rn_f: + if (APF.isNegative()) + return nullptr; + return ConstantFoldFP( + sqrt, APF, Ty, + nvvm::GetNVVMDenromMode( + nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + + // AMDGCN Intrinsics: case Intrinsic::amdgcn_cos: case Intrinsic::amdgcn_sin: { double V = getValueAsDouble(Op); |