diff options
author | Lewis Crawford <lcrawford@nvidia.com> | 2025-01-16 14:38:51 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-01-16 14:38:51 +0000 |
commit | cea92446ac289dc013e6253cb84445981010d08a (patch) | |
tree | d667fda0de72070d646d5b24be08b225fade5da4 /llvm | |
parent | 7dd34baf5505d689161c3a8678322a394d7a2929 (diff) | |
download | llvm-cea92446ac289dc013e6253cb84445981010d08a.zip llvm-cea92446ac289dc013e6253cb84445981010d08a.tar.gz llvm-cea92446ac289dc013e6253cb84445981010d08a.tar.bz2 |
[NVPTX] Constant fold NVVM fmin and fmax (#121966)
Add constant-folding for nvvm float/double fmin + fmax intrinsics,
including all combinations of xorsign.abs, nan-propagation, and ftz.
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/include/llvm/IR/NVVMIntrinsicUtils.h | 173 | ||||
-rw-r--r-- | llvm/lib/Analysis/ConstantFolding.cpp | 139 | ||||
-rw-r--r-- | llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll | 918 |
3 files changed, 1222 insertions, 8 deletions
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h index 8ca073b..ce794e2 100644 --- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h +++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h @@ -38,9 +38,8 @@ enum class TMAReductionOp : uint8_t { XOR = 7, }; -inline bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { +inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { switch (IntrinsicID) { - // Float to i32 / i64 conversion intrinsics: case Intrinsic::nvvm_f2i_rm_ftz: case Intrinsic::nvvm_f2i_rn_ftz: case Intrinsic::nvvm_f2i_rp_ftz: @@ -61,11 +60,53 @@ inline bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { case Intrinsic::nvvm_f2ull_rp_ftz: case Intrinsic::nvvm_f2ull_rz_ftz: return true; + + case Intrinsic::nvvm_f2i_rm: + case Intrinsic::nvvm_f2i_rn: + case Intrinsic::nvvm_f2i_rp: + case Intrinsic::nvvm_f2i_rz: + + case Intrinsic::nvvm_f2ui_rm: + case Intrinsic::nvvm_f2ui_rn: + case Intrinsic::nvvm_f2ui_rp: + case Intrinsic::nvvm_f2ui_rz: + + case Intrinsic::nvvm_d2i_rm: + case Intrinsic::nvvm_d2i_rn: + case Intrinsic::nvvm_d2i_rp: + case Intrinsic::nvvm_d2i_rz: + + case Intrinsic::nvvm_d2ui_rm: + case Intrinsic::nvvm_d2ui_rn: + case Intrinsic::nvvm_d2ui_rp: + case Intrinsic::nvvm_d2ui_rz: + + case Intrinsic::nvvm_f2ll_rm: + case Intrinsic::nvvm_f2ll_rn: + case Intrinsic::nvvm_f2ll_rp: + case Intrinsic::nvvm_f2ll_rz: + + case Intrinsic::nvvm_f2ull_rm: + case Intrinsic::nvvm_f2ull_rn: + case Intrinsic::nvvm_f2ull_rp: + case Intrinsic::nvvm_f2ull_rz: + + case Intrinsic::nvvm_d2ll_rm: + case Intrinsic::nvvm_d2ll_rn: + case Intrinsic::nvvm_d2ll_rp: + case Intrinsic::nvvm_d2ll_rz: + + case Intrinsic::nvvm_d2ull_rm: + case Intrinsic::nvvm_d2ull_rn: + case Intrinsic::nvvm_d2ull_rp: + case Intrinsic::nvvm_d2ull_rz: + return false; } + llvm_unreachable("Checking FTZ flag for invalid f2i/d2i intrinsic"); return false; } -inline bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) { +inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) { switch (IntrinsicID) { // f2i case Intrinsic::nvvm_f2i_rm: @@ -96,12 +137,44 @@ inline bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) { case Intrinsic::nvvm_d2ll_rp: case Intrinsic::nvvm_d2ll_rz: return true; + + // f2ui + case Intrinsic::nvvm_f2ui_rm: + case Intrinsic::nvvm_f2ui_rm_ftz: + case Intrinsic::nvvm_f2ui_rn: + case Intrinsic::nvvm_f2ui_rn_ftz: + case Intrinsic::nvvm_f2ui_rp: + case Intrinsic::nvvm_f2ui_rp_ftz: + case Intrinsic::nvvm_f2ui_rz: + case Intrinsic::nvvm_f2ui_rz_ftz: + // d2ui + case Intrinsic::nvvm_d2ui_rm: + case Intrinsic::nvvm_d2ui_rn: + case Intrinsic::nvvm_d2ui_rp: + case Intrinsic::nvvm_d2ui_rz: + // f2ull + case Intrinsic::nvvm_f2ull_rm: + case Intrinsic::nvvm_f2ull_rm_ftz: + case Intrinsic::nvvm_f2ull_rn: + case Intrinsic::nvvm_f2ull_rn_ftz: + case Intrinsic::nvvm_f2ull_rp: + case Intrinsic::nvvm_f2ull_rp_ftz: + case Intrinsic::nvvm_f2ull_rz: + case Intrinsic::nvvm_f2ull_rz_ftz: + // d2ull + case Intrinsic::nvvm_d2ull_rm: + case Intrinsic::nvvm_d2ull_rn: + case Intrinsic::nvvm_d2ull_rp: + case Intrinsic::nvvm_d2ull_rz: + return false; } + llvm_unreachable( + "Checking invalid f2i/d2i intrinsic for signed int conversion"); return false; } inline APFloat::roundingMode -IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) { +GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) { switch (IntrinsicID) { // RM: case Intrinsic::nvvm_f2i_rm: @@ -167,10 +240,100 @@ IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) { case Intrinsic::nvvm_d2ull_rz: return APFloat::rmTowardZero; } - llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic"); + llvm_unreachable("Checking rounding mode for invalid f2i/d2i intrinsic"); return APFloat::roundingMode::Invalid; } +inline bool FMinFMaxShouldFTZ(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_fmax_ftz_f: + case Intrinsic::nvvm_fmax_ftz_nan_f: + case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f: + + case Intrinsic::nvvm_fmin_ftz_f: + case Intrinsic::nvvm_fmin_ftz_nan_f: + case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f: + return true; + + case Intrinsic::nvvm_fmax_d: + case Intrinsic::nvvm_fmax_f: + case Intrinsic::nvvm_fmax_nan_f: + case Intrinsic::nvvm_fmax_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_xorsign_abs_f: + + case Intrinsic::nvvm_fmin_d: + case Intrinsic::nvvm_fmin_f: + case Intrinsic::nvvm_fmin_nan_f: + case Intrinsic::nvvm_fmin_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmin_xorsign_abs_f: + return false; + } + llvm_unreachable("Checking FTZ flag for invalid fmin/fmax intrinsic"); + return false; +} + +inline bool FMinFMaxPropagatesNaNs(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_fmax_ftz_nan_f: + case Intrinsic::nvvm_fmax_nan_f: + case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_nan_xorsign_abs_f: + + case Intrinsic::nvvm_fmin_ftz_nan_f: + case Intrinsic::nvvm_fmin_nan_f: + case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmin_nan_xorsign_abs_f: + return true; + + case Intrinsic::nvvm_fmax_d: + case Intrinsic::nvvm_fmax_f: + case Intrinsic::nvvm_fmax_ftz_f: + case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f: + case Intrinsic::nvvm_fmax_xorsign_abs_f: + + case Intrinsic::nvvm_fmin_d: + case Intrinsic::nvvm_fmin_f: + case Intrinsic::nvvm_fmin_ftz_f: + case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f: + case Intrinsic::nvvm_fmin_xorsign_abs_f: + return false; + } + llvm_unreachable("Checking NaN flag for invalid fmin/fmax intrinsic"); + return false; +} + +inline bool FMinFMaxIsXorSignAbs(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f: + case Intrinsic::nvvm_fmax_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_xorsign_abs_f: + + case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f: + case Intrinsic::nvvm_fmin_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmin_xorsign_abs_f: + return true; + + case Intrinsic::nvvm_fmax_d: + case Intrinsic::nvvm_fmax_f: + case Intrinsic::nvvm_fmax_ftz_f: + case Intrinsic::nvvm_fmax_ftz_nan_f: + case Intrinsic::nvvm_fmax_nan_f: + + case Intrinsic::nvvm_fmin_d: + case Intrinsic::nvvm_fmin_f: + case Intrinsic::nvvm_fmin_ftz_f: + case Intrinsic::nvvm_fmin_ftz_nan_f: + case Intrinsic::nvvm_fmin_nan_f: + return false; + } + llvm_unreachable("Checking XorSignAbs flag for invalid fmin/fmax intrinsic"); + return false; +} + } // namespace nvvm } // namespace llvm #endif // LLVM_IR_NVVMINTRINSICUTILS_H diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index ecdc841..3e87ea0 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1689,6 +1689,28 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::x86_avx512_cvttsd2usi64: return !Call->isStrictFP(); + // NVVM FMax intrinsics + case Intrinsic::nvvm_fmax_d: + case Intrinsic::nvvm_fmax_f: + case Intrinsic::nvvm_fmax_ftz_f: + case Intrinsic::nvvm_fmax_ftz_nan_f: + case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f: + case Intrinsic::nvvm_fmax_nan_f: + case Intrinsic::nvvm_fmax_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_xorsign_abs_f: + + // NVVM FMin intrinsics + case Intrinsic::nvvm_fmin_d: + case Intrinsic::nvvm_fmin_f: + case Intrinsic::nvvm_fmin_ftz_f: + case Intrinsic::nvvm_fmin_ftz_nan_f: + case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f: + case Intrinsic::nvvm_fmin_nan_f: + case Intrinsic::nvvm_fmin_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmin_xorsign_abs_f: + // NVVM float/double to int32/uint32 conversion intrinsics case Intrinsic::nvvm_f2i_rm: case Intrinsic::nvvm_f2i_rn: @@ -2431,9 +2453,10 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (U.isNaN()) return ConstantInt::get(Ty, 0); - APFloat::roundingMode RMode = nvvm::IntrinsicGetRoundingMode(IntrinsicID); - bool IsFTZ = nvvm::IntrinsicShouldFTZ(IntrinsicID); - bool IsSigned = nvvm::IntrinsicConvertsToSignedInteger(IntrinsicID); + APFloat::roundingMode RMode = + nvvm::GetFPToIntegerRoundingMode(IntrinsicID); + bool IsFTZ = nvvm::FPToIntegerIntrinsicShouldFTZ(IntrinsicID); + bool IsSigned = nvvm::FPToIntegerIntrinsicResultIsSigned(IntrinsicID); APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned); auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U; @@ -2892,12 +2915,49 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty, case Intrinsic::minnum: case Intrinsic::maximum: case Intrinsic::minimum: + case Intrinsic::nvvm_fmax_d: + case Intrinsic::nvvm_fmin_d: // If one argument is undef, return the other argument. if (IsOp0Undef) return Operands[1]; if (IsOp1Undef) return Operands[0]; break; + + case Intrinsic::nvvm_fmax_f: + case Intrinsic::nvvm_fmax_ftz_f: + case Intrinsic::nvvm_fmax_ftz_nan_f: + case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f: + case Intrinsic::nvvm_fmax_nan_f: + case Intrinsic::nvvm_fmax_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_xorsign_abs_f: + + case Intrinsic::nvvm_fmin_f: + case Intrinsic::nvvm_fmin_ftz_f: + case Intrinsic::nvvm_fmin_ftz_nan_f: + case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f: + case Intrinsic::nvvm_fmin_nan_f: + case Intrinsic::nvvm_fmin_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmin_xorsign_abs_f: + // If one arg is undef, the other arg can be returned only if it is + // constant, as we may need to flush it to sign-preserving zero or + // canonicalize the NaN. + if (!IsOp0Undef && !IsOp1Undef) + break; + if (auto *Op = dyn_cast<ConstantFP>(Operands[IsOp0Undef ? 1 : 0])) { + if (Op->isNaN()) { + APInt NVCanonicalNaN(32, 0x7fffffff); + return ConstantFP::get( + Ty, APFloat(Ty->getFltSemantics(), NVCanonicalNaN)); + } + if (nvvm::FMinFMaxShouldFTZ(IntrinsicID)) + return ConstantFP::get(Ty, FTZPreserveSign(Op->getValueAPF())); + else + return Op; + } + break; } } @@ -2955,6 +3015,79 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty, return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V)); case Intrinsic::maximum: return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V)); + + case Intrinsic::nvvm_fmax_d: + case Intrinsic::nvvm_fmax_f: + case Intrinsic::nvvm_fmax_ftz_f: + case Intrinsic::nvvm_fmax_ftz_nan_f: + case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f: + case Intrinsic::nvvm_fmax_nan_f: + case Intrinsic::nvvm_fmax_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_xorsign_abs_f: + + case Intrinsic::nvvm_fmin_d: + case Intrinsic::nvvm_fmin_f: + case Intrinsic::nvvm_fmin_ftz_f: + case Intrinsic::nvvm_fmin_ftz_nan_f: + case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f: + case Intrinsic::nvvm_fmin_nan_f: + case Intrinsic::nvvm_fmin_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmin_xorsign_abs_f: { + + bool ShouldCanonicalizeNaNs = !(IntrinsicID == Intrinsic::nvvm_fmax_d || + IntrinsicID == Intrinsic::nvvm_fmin_d); + bool IsFTZ = nvvm::FMinFMaxShouldFTZ(IntrinsicID); + bool IsNaNPropagating = nvvm::FMinFMaxPropagatesNaNs(IntrinsicID); + bool IsXorSignAbs = nvvm::FMinFMaxIsXorSignAbs(IntrinsicID); + + APFloat A = IsFTZ ? FTZPreserveSign(Op1V) : Op1V; + APFloat B = IsFTZ ? FTZPreserveSign(Op2V) : Op2V; + + bool XorSign = false; + if (IsXorSignAbs) { + XorSign = A.isNegative() ^ B.isNegative(); + A = abs(A); + B = abs(B); + } + + bool IsFMax = false; + switch (IntrinsicID) { + case Intrinsic::nvvm_fmax_d: + case Intrinsic::nvvm_fmax_f: + case Intrinsic::nvvm_fmax_ftz_f: + case Intrinsic::nvvm_fmax_ftz_nan_f: + case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f: + case Intrinsic::nvvm_fmax_nan_f: + case Intrinsic::nvvm_fmax_nan_xorsign_abs_f: + case Intrinsic::nvvm_fmax_xorsign_abs_f: + IsFMax = true; + break; + } + APFloat Res = IsFMax ? maximum(A, B) : minimum(A, B); + + if (ShouldCanonicalizeNaNs) { + APFloat NVCanonicalNaN(Res.getSemantics(), APInt(32, 0x7fffffff)); + if (A.isNaN() && B.isNaN()) + return ConstantFP::get(Ty, NVCanonicalNaN); + else if (IsNaNPropagating && (A.isNaN() || B.isNaN())) + return ConstantFP::get(Ty, NVCanonicalNaN); + } + + if (A.isNaN() && B.isNaN()) + return Operands[1]; + else if (A.isNaN()) + Res = B; + else if (B.isNaN()) + Res = A; + + if (IsXorSignAbs && XorSign != Res.isNegative()) + Res.changeSign(); + + return ConstantFP::get(Ty->getContext(), Res); + } } if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll new file mode 100644 index 0000000..4ab6b3c --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-fmin-fmax.ll @@ -0,0 +1,918 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instsimplify -march=nvptx64 --mcpu=sm_86 --mattr=+ptx72 -S | FileCheck %s + +; Check constant-folding for NVVM fmin fmax intrinsics + +;############################################################### +;# FMax(1.25, -2.0) # +;############################################################### + +define double @test_fmax_1_25_neg_2_d() { +; CHECK-LABEL: define double @test_fmax_1_25_neg_2_d() { +; CHECK-NEXT: ret double 1.250000e+00 +; + %res = call double @llvm.nvvm.fmax.d(double 1.25, double -2.0) + ret double %res +} + +define float @test_fmax_1_25_neg_2_f() { +; CHECK-LABEL: define float @test_fmax_1_25_neg_2_f() { +; CHECK-NEXT: ret float 1.250000e+00 +; + %res = call float @llvm.nvvm.fmax.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmax_1_25_neg_2_ftz_f() { +; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_f() { +; CHECK-NEXT: ret float 1.250000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmax_1_25_neg_2_ftz_nan_f() { +; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_nan_f() { +; CHECK-NEXT: ret float 1.250000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmax_1_25_neg_2_ftz_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float -2.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmax_1_25_neg_2_ftz_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_1_25_neg_2_ftz_xorsign_abs_f() { +; CHECK-NEXT: ret float -2.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmax_1_25_neg_2_nan_f() { +; CHECK-LABEL: define float @test_fmax_1_25_neg_2_nan_f() { +; CHECK-NEXT: ret float 1.250000e+00 +; + %res = call float @llvm.nvvm.fmax.nan.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmax_1_25_neg_2_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_1_25_neg_2_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float -2.000000e+00 +; + %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmax_1_25_neg_2_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_1_25_neg_2_xorsign_abs_f() { +; CHECK-NEXT: ret float -2.000000e+00 +; + %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 1.25, float -2.0) + ret float %res +} + +;############################################################### +;# FMax(+Subnormal, 0.0) # +;############################################################### + +define double @test_fmax_pos_subnorm_zero_d() { +; CHECK-LABEL: define double @test_fmax_pos_subnorm_zero_d() { +; CHECK-NEXT: ret double 0x380FFFFFC0000000 +; + %res = call double @llvm.nvvm.fmax.d(double 0x380FFFFFC0000000, double 0.0) + ret double %res +} + +define float @test_fmax_pos_subnorm_zero_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.f(float 0x380FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmax_pos_subnorm_zero_ftz_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmax_pos_subnorm_zero_ftz_nan_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_ftz_nan_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x380FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmax_pos_subnorm_zero_ftz_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_ftz_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmax_pos_subnorm_zero_ftz_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_ftz_xorsign_abs_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmax_pos_subnorm_zero_nan_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_nan_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.nan.f(float 0x380FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmax_pos_subnorm_zero_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmax_pos_subnorm_zero_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_zero_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x380FFFFFC0000000, float 0.0) + ret float %res +} + +;############################################################### +;# FMax(+Subnormal, -Subnormal) # +;############################################################### + +define double @test_fmax_pos_subnorm_neg_subnorm_d() { +; CHECK-LABEL: define double @test_fmax_pos_subnorm_neg_subnorm_d() { +; CHECK-NEXT: ret double 0x380FFFFFC0000000 +; + %res = call double @llvm.nvvm.fmax.d(double 0x380FFFFFC0000000, double 0xB80FFFFFC0000000) + ret double %res +} + +define float @test_fmax_pos_subnorm_neg_subnorm_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_neg_subnorm_ftz_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_neg_subnorm_ftz_nan_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_ftz_nan_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_neg_subnorm_ftz_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_ftz_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_neg_subnorm_ftz_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_ftz_xorsign_abs_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_neg_subnorm_nan_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_nan_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.nan.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_neg_subnorm_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0xB80FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_neg_subnorm_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_neg_subnorm_xorsign_abs_f() { +; CHECK-NEXT: ret float 0xB80FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +;############################################################### +;# FMax(+Subnormal, NaN) # +;############################################################### + +define double @test_fmax_pos_subnorm_nan_d() { +; CHECK-LABEL: define double @test_fmax_pos_subnorm_nan_d() { +; CHECK-NEXT: ret double 0x380FFFFFC0000000 +; + %res = call double @llvm.nvvm.fmax.d(double 0x380FFFFFC0000000, double 0x7fff444400000000) + ret double %res +} + +define float @test_fmax_pos_subnorm_nan_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_nan_ftz_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_nan_ftz_nan_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_nan_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_nan_ftz_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_nan_ftz_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_ftz_xorsign_abs_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_nan_nan_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_nan_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmax.nan.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_nan_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmax_pos_subnorm_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_pos_subnorm_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +;############################################################### +;# FMax(+Subnormal, undef) # +;############################################################### + +define double @test_fmax_subnorm_undef_d() { +; CHECK-LABEL: define double @test_fmax_subnorm_undef_d() { +; CHECK-NEXT: ret double 0x380FFFFFC0000000 +; + %res = call double @llvm.nvvm.fmax.d(double 0x380FFFFFC0000000, double undef) + ret double %res +} + +define float @test_fmax_subnorm_undef_f() { +; CHECK-LABEL: define float @test_fmax_subnorm_undef_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmax_subnorm_undef_ftz_f() { +; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmax_subnorm_undef_ftz_nan_f() { +; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_nan_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmax_subnorm_undef_ftz_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmax_subnorm_undef_ftz_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_subnorm_undef_ftz_xorsign_abs_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmax_subnorm_undef_nan_f() { +; CHECK-LABEL: define float @test_fmax_subnorm_undef_nan_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.nan.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmax_subnorm_undef_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_subnorm_undef_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmax_subnorm_undef_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_subnorm_undef_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +;############################################################### +;# FMax(NaN, undef) # +;############################################################### +; Ensure we canonicalize the NaNs for f32 + +define double @test_fmax_nan_undef_d() { +; CHECK-LABEL: define double @test_fmax_nan_undef_d() { +; CHECK-NEXT: ret double 0x7FF4444400000000 +; + %res = call double @llvm.nvvm.fmax.d(double 0x7ff4444400000000, double undef) + ret double %res +} + +define float @test_fmax_nan_undef_f() { +; CHECK-LABEL: define float @test_fmax_nan_undef_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmax.f(float 0x7fff444400000000, float undef) + ret float %res +} + +define float @test_fmax_nan_undef_ftz_f() { +; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmax.ftz.f(float 0x7fff444400000000, float undef) + ret float %res +} + +define float @test_fmax_nan_undef_ftz_nan_f() { +; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_nan_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmax.ftz.nan.f(float 0x7fff444400000000, float undef) + ret float %res +} + +define float @test_fmax_nan_undef_ftz_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float 0x7fff444400000000, float undef) + ret float %res +} + +define float @test_fmax_nan_undef_ftz_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_nan_undef_ftz_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float 0x7ffff4ff00000000, float undef) + ret float %res +} + +define float @test_fmax_nan_undef_nan_f() { +; CHECK-LABEL: define float @test_fmax_nan_undef_nan_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmax.nan.f(float 0x7fff444400000000, float undef) + ret float %res +} + +define float @test_fmax_nan_undef_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_nan_undef_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmax.nan.xorsign.abs.f(float 0x7fff444400000000, float undef) + ret float %res +} + +define float @test_fmax_nan_undef_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmax_nan_undef_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmax.xorsign.abs.f(float 0x7fff444400000000, float undef) + ret float %res +} + +;############################################################### +;# FMin(1.25, -2.0) # +;############################################################### + +define double @test_fmin_1_25_neg_2_d() { +; CHECK-LABEL: define double @test_fmin_1_25_neg_2_d() { +; CHECK-NEXT: ret double -2.000000e+00 +; + %res = call double @llvm.nvvm.fmin.d(double 1.25, double -2.0) + ret double %res +} + +define float @test_fmin_1_25_neg_2_f() { +; CHECK-LABEL: define float @test_fmin_1_25_neg_2_f() { +; CHECK-NEXT: ret float -2.000000e+00 +; + %res = call float @llvm.nvvm.fmin.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmin_1_25_neg_2_ftz_f() { +; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_f() { +; CHECK-NEXT: ret float -2.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmin_1_25_neg_2_ftz_nan_f() { +; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_nan_f() { +; CHECK-NEXT: ret float -2.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmin_1_25_neg_2_ftz_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float -1.250000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmin_1_25_neg_2_ftz_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_1_25_neg_2_ftz_xorsign_abs_f() { +; CHECK-NEXT: ret float -1.250000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmin_1_25_neg_2_nan_f() { +; CHECK-LABEL: define float @test_fmin_1_25_neg_2_nan_f() { +; CHECK-NEXT: ret float -2.000000e+00 +; + %res = call float @llvm.nvvm.fmin.nan.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmin_1_25_neg_2_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_1_25_neg_2_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float -1.250000e+00 +; + %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 1.25, float -2.0) + ret float %res +} + +define float @test_fmin_1_25_neg_2_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_1_25_neg_2_xorsign_abs_f() { +; CHECK-NEXT: ret float -1.250000e+00 +; + %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 1.25, float -2.0) + ret float %res +} + +;############################################################### +;# FMin(-Subnormal, 0.0) # +;############################################################### + +define double @test_fmin_neg_subnorm_zero_d() { +; CHECK-LABEL: define double @test_fmin_neg_subnorm_zero_d() { +; CHECK-NEXT: ret double 0xB80FFFFFC0000000 +; + %res = call double @llvm.nvvm.fmin.d(double 0xB80FFFFFC0000000, double 0.0) + ret double %res +} + +define float @test_fmin_neg_subnorm_zero_f() { +; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_f() { +; CHECK-NEXT: ret float 0xB80FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmin.f(float 0xB80FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmin_neg_subnorm_zero_ftz_f() { +; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_ftz_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.f(float 0xB80FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmin_neg_subnorm_zero_ftz_nan_f() { +; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_ftz_nan_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0xB80FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmin_neg_subnorm_zero_ftz_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_ftz_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0xB80FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmin_neg_subnorm_zero_ftz_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_ftz_xorsign_abs_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0xB80FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmin_neg_subnorm_zero_nan_f() { +; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_nan_f() { +; CHECK-NEXT: ret float 0xB80FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmin.nan.f(float 0xB80FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmin_neg_subnorm_zero_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0xB80FFFFFC0000000, float 0.0) + ret float %res +} + +define float @test_fmin_neg_subnorm_zero_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_neg_subnorm_zero_xorsign_abs_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0xB80FFFFFC0000000, float 0.0) + ret float %res +} + +;############################################################### +;# FMin(+Subnormal, -Subnormal) # +;############################################################### + +define double @test_fmin_pos_subnorm_neg_subnorm_d() { +; CHECK-LABEL: define double @test_fmin_pos_subnorm_neg_subnorm_d() { +; CHECK-NEXT: ret double 0xB80FFFFFC0000000 +; + %res = call double @llvm.nvvm.fmin.d(double 0x380FFFFFC0000000, double 0xB80FFFFFC0000000) + ret double %res +} + +define float @test_fmin_pos_subnorm_neg_subnorm_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_f() { +; CHECK-NEXT: ret float 0xB80FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmin.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_neg_subnorm_ftz_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_ftz_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_neg_subnorm_ftz_nan_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_ftz_nan_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_neg_subnorm_ftz_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_ftz_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_neg_subnorm_ftz_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_ftz_xorsign_abs_f() { +; CHECK-NEXT: ret float -0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_neg_subnorm_nan_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_nan_f() { +; CHECK-NEXT: ret float 0xB80FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmin.nan.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_neg_subnorm_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0xB80FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_neg_subnorm_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_neg_subnorm_xorsign_abs_f() { +; CHECK-NEXT: ret float 0xB80FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0x380FFFFFC0000000, float 0xB80FFFFFC0000000) + ret float %res +} + +;############################################################### +;# FMin(+Subnormal, NaN) # +;############################################################### + +define double @test_fmin_pos_subnorm_nan_d() { +; CHECK-LABEL: define double @test_fmin_pos_subnorm_nan_d() { +; CHECK-NEXT: ret double 0x380FFFFFC0000000 +; + %res = call double @llvm.nvvm.fmin.d(double 0x380FFFFFC0000000, double 0x7fff444400000000) + ret double %res +} + +define float @test_fmin_pos_subnorm_nan_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmin.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_nan_ftz_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_nan_ftz_nan_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_nan_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_nan_ftz_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_nan_ftz_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_ftz_xorsign_abs_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_nan_nan_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_nan_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmin.nan.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_nan_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +define float @test_fmin_pos_subnorm_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_pos_subnorm_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0x380FFFFFC0000000, float 0x7fff444400000000) + ret float %res +} + +;############################################################### +;# FMin(+Subnormal, undef) # +;############################################################### + +define double @test_fmin_subnorm_undef_d() { +; CHECK-LABEL: define double @test_fmin_subnorm_undef_d() { +; CHECK-NEXT: ret double 0x380FFFFFC0000000 +; + %res = call double @llvm.nvvm.fmin.d(double 0x380FFFFFC0000000, double undef) + ret double %res +} + +define float @test_fmin_subnorm_undef_f() { +; CHECK-LABEL: define float @test_fmin_subnorm_undef_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmin.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmin_subnorm_undef_ftz_f() { +; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmin_subnorm_undef_ftz_nan_f() { +; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_nan_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmin_subnorm_undef_ftz_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmin_subnorm_undef_ftz_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_subnorm_undef_ftz_xorsign_abs_f() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmin_subnorm_undef_nan_f() { +; CHECK-LABEL: define float @test_fmin_subnorm_undef_nan_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmin.nan.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmin_subnorm_undef_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_subnorm_undef_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +define float @test_fmin_subnorm_undef_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_subnorm_undef_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x380FFFFFC0000000 +; + %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0x380FFFFFC0000000, float undef) + ret float %res +} + +;############################################################### +;# FMin(NaN, undef) # +;############################################################### +; Ensure we canonicalize the NaNs for f32 + +define double @test_fmin_nan_undef_d() { +; CHECK-LABEL: define double @test_fmin_nan_undef_d() { +; CHECK-NEXT: ret double 0x7FF4444400000000 +; + %res = call double @llvm.nvvm.fmin.d(double 0x7ff4444400000000, double undef) + ret double %res +} + +define float @test_fmin_nan_undef_f() { +; CHECK-LABEL: define float @test_fmin_nan_undef_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmin.f(float 0x7fff444400000000, float undef) + ret float %res +} + +define float @test_fmin_nan_undef_ftz_f() { +; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmin.ftz.f(float 0x7fff444400000000, float undef) + ret float %res +} + +define float @test_fmin_nan_undef_ftz_nan_f() { +; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_nan_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmin.ftz.nan.f(float 0x7fff444400000000, float undef) + ret float %res +} + +define float @test_fmin_nan_undef_ftz_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float 0x7fff444400000000, float undef) + ret float %res +} + +define float @test_fmin_nan_undef_ftz_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_nan_undef_ftz_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float 0x7ffff4ff00000000, float undef) + ret float %res +} + +define float @test_fmin_nan_undef_nan_f() { +; CHECK-LABEL: define float @test_fmin_nan_undef_nan_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmin.nan.f(float 0x7fff444400000000, float undef) + ret float %res +} + +define float @test_fmin_nan_undef_nan_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_nan_undef_nan_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmin.nan.xorsign.abs.f(float 0x7fff444400000000, float undef) + ret float %res +} + +define float @test_fmin_nan_undef_xorsign_abs_f() { +; CHECK-LABEL: define float @test_fmin_nan_undef_xorsign_abs_f() { +; CHECK-NEXT: ret float 0x7FFFFFFFE0000000 +; + %res = call float @llvm.nvvm.fmin.xorsign.abs.f(float 0x7fff444400000000, float undef) + ret float %res +} |