[ConstantFolding] Add flag to disable call folding (#140270)

Add an optional flag to disable constant-folding for function calls. This applies to both intrinsics and libcalls. This is not necessary in most cases, so is disabled by default, but in cases that require bit-exact precision between the result from constant-folding and run-time execution, having this flag can be useful, and may help with debugging. Cases where mismatches can occur include GPU execution vs host-side folding, cross-compilation scenarios, or compilation vs execution environments with different math library versions. This applies only to calls, rather than all FP arithmetic. Methods such as fast-math-flags can be used to limit reassociation, fma-fusion etc, and basic arithmetic operations are precisely defined in IEEE 754. However, other math operations such as sqrt, sin, pow etc. represented by either libcalls or intrinsics are less well defined, and may vary more between different architectures/library implementations. As this option is not intended for most common use-cases, this patch takes the more conservative approach of disabling constant-folding even for operations like fmax, copysign, fabs etc. in order to keep the implementation simple, rather than sprinkling checks for this flag throughout. The use-cases for this option are similar to StrictFP, but it is only limited to FP call folding, rather than all FP operations, as it is about precise arithmetic results, rather than FP environment behaviours. It also can be used to when linking .bc files compiled with different StrictFP settings with llvm-link.
author: Lewis Crawford <lcrawford@nvidia.com> 2025-05-30 11:27:18 +0100
committer: GitHub <noreply@github.com> 2025-05-30 11:27:18 +0100
commit: 1f7885cf9c6801d11491c8c194c999f7223dd141 (patch)
tree: 8749196f5987f3c787d26a1fe398301ef010acdc /llvm/lib/Analysis/ConstantFolding.cpp
parent: 7c996012ceee0997838e3246cce169652b0b6eb6 (diff)
download: llvm-1f7885cf9c6801d11491c8c194c999f7223dd141.zip
llvm-1f7885cf9c6801d11491c8c194c999f7223dd141.tar.gz
llvm-1f7885cf9c6801d11491c8c194c999f7223dd141.tar.bz2
1 files changed, 23 insertions, 3 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 2476dc5..7dd7f41 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -64,6 +64,11 @@
 
 using namespace llvm;
 
+static cl::opt<bool> DisableFPCallFolding(
+    "disable-fp-call-folding",
+    cl::desc("Disable constant-folding of FP intrinsics and libcalls."),
+    cl::init(false), cl::Hidden);
+
 namespace {
 
 //===----------------------------------------------------------------------===//
@@ -1576,6 +1581,17 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
     return false;
   if (Call->getFunctionType() != F->getFunctionType())
     return false;
+
+  // Allow FP calls (both libcalls and intrinsics) to avoid being folded.
+  // This can be useful for GPU targets or in cross-compilation scenarios
+  // when the exact target FP behaviour is required, and the host compiler's
+  // behaviour may be slightly different from the device's run-time behaviour.
+  if (DisableFPCallFolding && (F->getReturnType()->isFloatingPointTy() ||
+                               any_of(F->args(), [](const Argument &Arg) {
+                                 return Arg.getType()->isFloatingPointTy();
+                               })))
+    return false;
+
   switch (F->getIntrinsicID()) {
   // Operations that do not operate floating-point numbers and do not depend on
   // FP environment can be folded even in strictfp functions.
@@ -1700,7 +1716,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::x86_avx512_vcvtsd2usi64:
   case Intrinsic::x86_avx512_cvttsd2usi:
   case Intrinsic::x86_avx512_cvttsd2usi64:
-    return !Call->isStrictFP();
 
   // NVVM FMax intrinsics
   case Intrinsic::nvvm_fmax_d:
@@ -1775,6 +1790,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::nvvm_d2ull_rn:
   case Intrinsic::nvvm_d2ull_rp:
   case Intrinsic::nvvm_d2ull_rz:
+    return !Call->isStrictFP();
 
   // Sign operations are actually bitwise operations, they do not raise
   // exceptions even for SNANs.
@@ -3909,8 +3925,12 @@ ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
 Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
                                             Constant *RHS, Type *Ty,
                                             Instruction *FMFSource) {
-  return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS},
-                                    dyn_cast_if_present<CallBase>(FMFSource));
+  auto *Call = dyn_cast_if_present<CallBase>(FMFSource);
+  // Ensure we check flags like StrictFP that might prevent this from getting
+  // folded before generating a result.
+  if (Call && !canConstantFoldCallTo(Call, Call->getCalledFunction()))
+    return nullptr;
+  return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, Call);
 }
 
 Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
author	Lewis Crawford <lcrawford@nvidia.com>	2025-05-30 11:27:18 +0100
committer	GitHub <noreply@github.com>	2025-05-30 11:27:18 +0100
commit	1f7885cf9c6801d11491c8c194c999f7223dd141 (patch)
tree	8749196f5987f3c787d26a1fe398301ef010acdc /llvm/lib/Analysis/ConstantFolding.cpp
parent	7c996012ceee0997838e3246cce169652b0b6eb6 (diff)
download	llvm-1f7885cf9c6801d11491c8c194c999f7223dd141.zip llvm-1f7885cf9c6801d11491c8c194c999f7223dd141.tar.gz llvm-1f7885cf9c6801d11491c8c194c999f7223dd141.tar.bz2