aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis/ConstantFolding.cpp
diff options
context:
space:
mode:
authorLewis Crawford <lcrawford@nvidia.com>2025-01-16 14:38:51 +0000
committerGitHub <noreply@github.com>2025-01-16 14:38:51 +0000
commitcea92446ac289dc013e6253cb84445981010d08a (patch)
treed667fda0de72070d646d5b24be08b225fade5da4 /llvm/lib/Analysis/ConstantFolding.cpp
parent7dd34baf5505d689161c3a8678322a394d7a2929 (diff)
downloadllvm-cea92446ac289dc013e6253cb84445981010d08a.zip
llvm-cea92446ac289dc013e6253cb84445981010d08a.tar.gz
llvm-cea92446ac289dc013e6253cb84445981010d08a.tar.bz2
[NVPTX] Constant fold NVVM fmin and fmax (#121966)
Add constant-folding for nvvm float/double fmin + fmax intrinsics, including all combinations of xorsign.abs, nan-propagation, and ftz.
Diffstat (limited to 'llvm/lib/Analysis/ConstantFolding.cpp')
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp139
1 files changed, 136 insertions, 3 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index ecdc841..3e87ea0 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1689,6 +1689,28 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::x86_avx512_cvttsd2usi64:
return !Call->isStrictFP();
+ // NVVM FMax intrinsics
+ case Intrinsic::nvvm_fmax_d:
+ case Intrinsic::nvvm_fmax_f:
+ case Intrinsic::nvvm_fmax_ftz_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_nan_f:
+ case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+ // NVVM FMin intrinsics
+ case Intrinsic::nvvm_fmin_d:
+ case Intrinsic::nvvm_fmin_f:
+ case Intrinsic::nvvm_fmin_ftz_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_nan_f:
+ case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_xorsign_abs_f:
+
// NVVM float/double to int32/uint32 conversion intrinsics
case Intrinsic::nvvm_f2i_rm:
case Intrinsic::nvvm_f2i_rn:
@@ -2431,9 +2453,10 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
if (U.isNaN())
return ConstantInt::get(Ty, 0);
- APFloat::roundingMode RMode = nvvm::IntrinsicGetRoundingMode(IntrinsicID);
- bool IsFTZ = nvvm::IntrinsicShouldFTZ(IntrinsicID);
- bool IsSigned = nvvm::IntrinsicConvertsToSignedInteger(IntrinsicID);
+ APFloat::roundingMode RMode =
+ nvvm::GetFPToIntegerRoundingMode(IntrinsicID);
+ bool IsFTZ = nvvm::FPToIntegerIntrinsicShouldFTZ(IntrinsicID);
+ bool IsSigned = nvvm::FPToIntegerIntrinsicResultIsSigned(IntrinsicID);
APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U;
@@ -2892,12 +2915,49 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
case Intrinsic::minnum:
case Intrinsic::maximum:
case Intrinsic::minimum:
+ case Intrinsic::nvvm_fmax_d:
+ case Intrinsic::nvvm_fmin_d:
// If one argument is undef, return the other argument.
if (IsOp0Undef)
return Operands[1];
if (IsOp1Undef)
return Operands[0];
break;
+
+ case Intrinsic::nvvm_fmax_f:
+ case Intrinsic::nvvm_fmax_ftz_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_nan_f:
+ case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+ case Intrinsic::nvvm_fmin_f:
+ case Intrinsic::nvvm_fmin_ftz_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_nan_f:
+ case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_xorsign_abs_f:
+ // If one arg is undef, the other arg can be returned only if it is
+ // constant, as we may need to flush it to sign-preserving zero or
+ // canonicalize the NaN.
+ if (!IsOp0Undef && !IsOp1Undef)
+ break;
+ if (auto *Op = dyn_cast<ConstantFP>(Operands[IsOp0Undef ? 1 : 0])) {
+ if (Op->isNaN()) {
+ APInt NVCanonicalNaN(32, 0x7fffffff);
+ return ConstantFP::get(
+ Ty, APFloat(Ty->getFltSemantics(), NVCanonicalNaN));
+ }
+ if (nvvm::FMinFMaxShouldFTZ(IntrinsicID))
+ return ConstantFP::get(Ty, FTZPreserveSign(Op->getValueAPF()));
+ else
+ return Op;
+ }
+ break;
}
}
@@ -2955,6 +3015,79 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V));
case Intrinsic::maximum:
return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V));
+
+ case Intrinsic::nvvm_fmax_d:
+ case Intrinsic::nvvm_fmax_f:
+ case Intrinsic::nvvm_fmax_ftz_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_nan_f:
+ case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_xorsign_abs_f:
+
+ case Intrinsic::nvvm_fmin_d:
+ case Intrinsic::nvvm_fmin_f:
+ case Intrinsic::nvvm_fmin_ftz_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_f:
+ case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_nan_f:
+ case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmin_xorsign_abs_f: {
+
+ bool ShouldCanonicalizeNaNs = !(IntrinsicID == Intrinsic::nvvm_fmax_d ||
+ IntrinsicID == Intrinsic::nvvm_fmin_d);
+ bool IsFTZ = nvvm::FMinFMaxShouldFTZ(IntrinsicID);
+ bool IsNaNPropagating = nvvm::FMinFMaxPropagatesNaNs(IntrinsicID);
+ bool IsXorSignAbs = nvvm::FMinFMaxIsXorSignAbs(IntrinsicID);
+
+ APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V;
+ APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V;
+
+ bool XorSign = false;
+ if (IsXorSignAbs) {
+ XorSign = A.isNegative() ^ B.isNegative();
+ A = abs(A);
+ B = abs(B);
+ }
+
+ bool IsFMax = false;
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_fmax_d:
+ case Intrinsic::nvvm_fmax_f:
+ case Intrinsic::nvvm_fmax_ftz_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_f:
+ case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_nan_f:
+ case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
+ case Intrinsic::nvvm_fmax_xorsign_abs_f:
+ IsFMax = true;
+ break;
+ }
+ APFloat Res = IsFMax ? maximum(A, B) : minimum(A, B);
+
+ if (ShouldCanonicalizeNaNs) {
+ APFloat NVCanonicalNaN(Res.getSemantics(), APInt(32, 0x7fffffff));
+ if (A.isNaN() && B.isNaN())
+ return ConstantFP::get(Ty, NVCanonicalNaN);
+ else if (IsNaNPropagating && (A.isNaN() || B.isNaN()))
+ return ConstantFP::get(Ty, NVCanonicalNaN);
+ }
+
+ if (A.isNaN() && B.isNaN())
+ return Operands[1];
+ else if (A.isNaN())
+ Res = B;
+ else if (B.isNaN())
+ Res = A;
+
+ if (IsXorSignAbs && XorSign != Res.isNegative())
+ Res.changeSign();
+
+ return ConstantFP::get(Ty->getContext(), Res);
+ }
}
if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())