[InstCombine] canonicalize fmin/fmax to LLVM intrinsics minnum/maxnum

This transform came up in D62414, but we should deal with it first. We have LLVM intrinsics that correspond exactly to libm calls (unlike most libm calls, these libm calls never set errno). This holds without any fast-math-flags, so we should always canonicalize to those intrinsics directly for better optimization. Currently, we convert to fcmp+select only when we have FMF (nnan) because fcmp+select does not preserve the semantics of the call in the general case. Differential Revision: https://reviews.llvm.org/D63214 llvm-svn: 364714
author: Sanjay Patel <spatel@rotateright.com> 2019-06-29 14:28:54 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2019-06-29 14:28:54 +0000
commit: 77dc1e85683c585a08982af745f07647624eda1b (patch)
tree: 3ca036c1b1d586b3e5b116a95d7e761fc1c80a64 /llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
parent: 61a8b62b4c961f25410fc126880befdf85424ea1 (diff)
download: llvm-77dc1e85683c585a08982af745f07647624eda1b.zip
llvm-77dc1e85683c585a08982af745f07647624eda1b.tar.gz
llvm-77dc1e85683c585a08982af745f07647624eda1b.tar.bz2
1 files changed, 14 insertions, 24 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 0c95f4c..b5f8b39 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1563,40 +1563,30 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
 }
 
 Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
-  Function *Callee = CI->getCalledFunction();
   // If we can shrink the call to a float function rather than a double
   // function, do that first.
+  Function *Callee = CI->getCalledFunction();
   StringRef Name = Callee->getName();
   if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
     if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
       return Ret;
 
+  // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
+  // the intrinsics for improved optimization (for example, vectorization).
+  // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
+  // From the C standard draft WG14/N1256:
+  // "Ideally, fmax would be sensitive to the sign of zero, for example
+  // fmax(-0.0, +0.0) would return +0; however, implementation in software
+  // might be impractical."
   IRBuilder<>::FastMathFlagGuard Guard(B);
-  FastMathFlags FMF;
-  if (CI->isFast()) {
-    // If the call is 'fast', then anything we create here will also be 'fast'.
-    FMF.setFast();
-  } else {
-    // At a minimum, no-nans-fp-math must be true.
-    if (!CI->hasNoNaNs())
-      return nullptr;
-    // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
-    // "Ideally, fmax would be sensitive to the sign of zero, for example
-    // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
-    // might be impractical."
-    FMF.setNoSignedZeros();
-    FMF.setNoNaNs();
-  }
+  FastMathFlags FMF = CI->getFastMathFlags();
+  FMF.setNoSignedZeros();
   B.setFastMathFlags(FMF);
 
-  // We have a relaxed floating-point environment. We can ignore NaN-handling
-  // and transform to a compare and select. We do not have to consider errno or
-  // exceptions, because fmin/fmax do not have those.
-  Value *Op0 = CI->getArgOperand(0);
-  Value *Op1 = CI->getArgOperand(1);
-  Value *Cmp = Callee->getName().startswith("fmin") ?
-    B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
-  return B.CreateSelect(Cmp, Op0, Op1);
+  Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
+                                                           : Intrinsic::maxnum;
+  Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
+  return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
 }
 
 Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
author	Sanjay Patel <spatel@rotateright.com>	2019-06-29 14:28:54 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2019-06-29 14:28:54 +0000
commit	77dc1e85683c585a08982af745f07647624eda1b (patch)
tree	3ca036c1b1d586b3e5b116a95d7e761fc1c80a64 /llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
parent	61a8b62b4c961f25410fc126880befdf85424ea1 (diff)
download	llvm-77dc1e85683c585a08982af745f07647624eda1b.zip llvm-77dc1e85683c585a08982af745f07647624eda1b.tar.gz llvm-77dc1e85683c585a08982af745f07647624eda1b.tar.bz2