diff options
author | Lewis Crawford <lcrawford@nvidia.com> | 2025-07-21 17:48:45 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-21 17:48:45 +0100 |
commit | 0823f4ff086e5352f7543b68ce6e7823498cf44b (patch) | |
tree | bda0df2595baca63fc73a9d8f7382be492812305 /llvm/lib/Analysis/ConstantFolding.cpp | |
parent | f85c1a5615c87f4598c6859578c0c30d4ea6a58c (diff) | |
download | llvm-0823f4ff086e5352f7543b68ce6e7823498cf44b.zip llvm-0823f4ff086e5352f7543b68ce6e7823498cf44b.tar.gz llvm-0823f4ff086e5352f7543b68ce6e7823498cf44b.tar.bz2 |
[ConstantFolding] Fix nvvm_round folding on PPC (#149837)
Fix a failing test for constant-folding the nvvm_round intrinsic. The
original implementation added in #141233 used a native libm call to the
"round" function, but on PPC this produces +0.0 if the input is -0.0,
which caused a test failure.
This patch updates it to use APFloat functions instead of native libm
calls to ensure cross-platform consistency.
Diffstat (limited to 'llvm/lib/Analysis/ConstantFolding.cpp')
-rw-r--r-- | llvm/lib/Analysis/ConstantFolding.cpp | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index f5a88b6..e71ba5e 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -2677,11 +2677,14 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, case Intrinsic::nvvm_round_ftz_f: case Intrinsic::nvvm_round_f: - case Intrinsic::nvvm_round_d: - return ConstantFoldFP( - round, APF, Ty, - nvvm::GetNVVMDenromMode( - nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID))); + case Intrinsic::nvvm_round_d: { + // Use APFloat implementation instead of native libm call, as some + // implementations (e.g. on PPC) do not preserve the sign of negative 0. + bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID); + auto V = IsFTZ ? FTZPreserveSign(APF) : APF; + V.roundToIntegral(APFloat::rmNearestTiesToAway); + return ConstantFP::get(Ty->getContext(), V); + } case Intrinsic::nvvm_saturate_ftz_f: case Intrinsic::nvvm_saturate_d: |