From 315c88c5fbdb2b27cebf23c87fb502f7a567d84b Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin@nvidia.com>
Date: Wed, 3 Apr 2024 10:19:06 -0700
Subject: [flang] Fixed MODULO(x, inf) to produce NaN. (#86145)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Straightforward computation of `A − FLOOR (A / P) * P` should
produce NaN, when P is infinity. The -menable-no-infs lowering
can still use the relaxed operations sequence.
---
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp   |  5 ++-
 flang/lib/Optimizer/Builder/Runtime/Numeric.cpp | 22 +++++++++++-
 flang/runtime/numeric-templates.h               | 29 +++++++++++++---
 flang/test/Lower/Intrinsics/modulo.f90          | 18 +++++-----
 flang/unittests/Runtime/Numeric.cpp             | 46 +++++++++++++++++++++++++
 5 files changed, 105 insertions(+), 15 deletions(-)
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 069ba81..5f6de94 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -5259,9 +5259,12 @@ mlir::Value IntrinsicLibrary::genModulo(mlir::Type resultType,
                                                  remainder);
   }
 
+  auto fastMathFlags = builder.getFastMathFlags();
   // F128 arith::RemFOp may be lowered to a runtime call that may be unsupported
   // on the target, so generate a call to Fortran Runtime's ModuloReal16.
-  if (resultType == mlir::FloatType::getF128(builder.getContext()))
+  if (resultType == mlir::FloatType::getF128(builder.getContext()) ||
+      (fastMathFlags & mlir::arith::FastMathFlags::ninf) ==
+          mlir::arith::FastMathFlags::none)
     return builder.createConvert(
         loc, resultType,
         fir::runtime::genModulo(builder, loc, args[0], args[1]));
diff --git a/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp b/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp
index 4dcbd13..81d5d21 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp
@@ -118,6 +118,20 @@ struct ForcedMod16 {
   }
 };
 
+/// Placeholder for real*10 version of Modulo Intrinsic
+struct ForcedModulo10 {
+  static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ModuloReal10));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto fltTy = mlir::FloatType::getF80(ctx);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      return mlir::FunctionType::get(ctx, {fltTy, fltTy, strTy, intTy},
+                                     {fltTy});
+    };
+  }
+};
+
 /// Placeholder for real*16 version of Modulo Intrinsic
 struct ForcedModulo16 {
   static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ModuloReal16));
@@ -349,7 +363,13 @@ mlir::Value fir::runtime::genModulo(fir::FirOpBuilder &builder,
 
   // MODULO is lowered into math operations in intrinsics lowering,
   // so genModulo() should only be used for F128 data type now.
-  if (fltTy.isF128())
+  if (fltTy.isF32())
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ModuloReal4)>(loc, builder);
+  else if (fltTy.isF64())
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ModuloReal8)>(loc, builder);
+  else if (fltTy.isF80())
+    func = fir::runtime::getRuntimeFunc<ForcedModulo10>(loc, builder);
+  else if (fltTy.isF128())
     func = fir::runtime::getRuntimeFunc<ForcedModulo16>(loc, builder);
   else
     fir::intrinsicTypeTODO(builder, fltTy, loc, "MODULO");
diff --git a/flang/runtime/numeric-templates.h b/flang/runtime/numeric-templates.h
index af552f9..4936e77 100644
--- a/flang/runtime/numeric-templates.h
+++ b/flang/runtime/numeric-templates.h
@@ -237,8 +237,12 @@ inline RT_API_ATTRS T RealMod(
   if (ISNANTy<T>::compute(a) || ISNANTy<T>::compute(p) ||
       ISINFTy<T>::compute(a)) {
     return QNANTy<T>::compute();
-  } else if (ISINFTy<T>::compute(p)) {
-    return a;
+  } else if (IS_MODULO && ISINFTy<T>::compute(p)) {
+    // Other compilers behave consistently for MOD(x, +/-INF)
+    // and always return x. This is probably related to
+    // implementation of std::fmod(). Stick to this behavior
+    // for MOD, but return NaN for MODULO(x, +/-INF).
+    return QNANTy<T>::compute();
   }
   T aAbs{ABSTy<T>::compute(a)};
   T pAbs{ABSTy<T>::compute(p)};
@@ -248,8 +252,19 @@ inline RT_API_ATTRS T RealMod(
       if (auto pInt{static_cast<std::int64_t>(p)}; p == pInt) {
         // Fast exact case for integer operands
         auto mod{aInt - (aInt / pInt) * pInt};
-        if (IS_MODULO && (aInt > 0) != (pInt > 0)) {
-          mod += pInt;
+        if constexpr (IS_MODULO) {
+          if (mod == 0) {
+            // Return properly signed zero.
+            return pInt > 0 ? T{0} : -T{0};
+          }
+          if ((aInt > 0) != (pInt > 0)) {
+            mod += pInt;
+          }
+        } else {
+          if (mod == 0) {
+            // Return properly signed zero.
+            return aInt > 0 ? T{0} : -T{0};
+          }
         }
         return static_cast<T>(mod);
       }
@@ -297,7 +312,11 @@ inline RT_API_ATTRS T RealMod(
     }
     if constexpr (IS_MODULO) {
       if ((a < 0) != (p < 0)) {
-        tmp += p;
+        if (tmp == 0.) {
+          tmp = -tmp;
+        } else {
+          tmp += p;
+        }
       }
     }
     return tmp;
diff --git a/flang/test/Lower/Intrinsics/modulo.f90 b/flang/test/Lower/Intrinsics/modulo.f90
index 383cb34..ac18e59 100644
--- a/flang/test/Lower/Intrinsics/modulo.f90
+++ b/flang/test/Lower/Intrinsics/modulo.f90
@@ -1,11 +1,13 @@
-! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
+! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s -check-prefixes=HONORINF,ALL
+! RUN: flang-new -fc1 -menable-no-infs -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s -check-prefixes=CHECK,ALL
 
-! CHECK-LABEL: func @_QPmodulo_testr(
-! CHECK-SAME: %[[arg0:.*]]: !fir.ref<f64>{{.*}}, %[[arg1:.*]]: !fir.ref<f64>{{.*}}, %[[arg2:.*]]: !fir.ref<f64>{{.*}}) {
+! ALL-LABEL: func @_QPmodulo_testr(
+! ALL-SAME: %[[arg0:.*]]: !fir.ref<f64>{{.*}}, %[[arg1:.*]]: !fir.ref<f64>{{.*}}, %[[arg2:.*]]: !fir.ref<f64>{{.*}}) {
 subroutine modulo_testr(r, a, p)
   real(8) :: r, a, p
-  ! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<f64>
-  ! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<f64>
+  ! ALL-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<f64>
+  ! ALL-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<f64>
+  ! HONORINF: %[[res:.*]] = fir.call @_FortranAModuloReal8(%[[a]], %[[p]]
   ! CHECK-DAG: %[[rem:.*]] = arith.remf %[[a]], %[[p]] {{.*}}: f64
   ! CHECK-DAG: %[[zero:.*]] = arith.constant 0.000000e+00 : f64
   ! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpf une, %[[rem]], %[[zero]] {{.*}} : f64
@@ -15,12 +17,12 @@ subroutine modulo_testr(r, a, p)
   ! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
   ! CHECK-DAG: %[[remPlusP:.*]] = arith.addf %[[rem]], %[[p]] {{.*}}: f64
   ! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : f64
-  ! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<f64>
+  ! ALL: fir.store %[[res]] to %[[arg0]] : !fir.ref<f64>
   r = modulo(a, p)
 end subroutine
   
-! CHECK-LABEL: func @_QPmodulo_testi(
-! CHECK-SAME: %[[arg0:.*]]: !fir.ref<i64>{{.*}}, %[[arg1:.*]]: !fir.ref<i64>{{.*}}, %[[arg2:.*]]: !fir.ref<i64>{{.*}}) {
+! ALL-LABEL: func @_QPmodulo_testi(
+! ALL-SAME: %[[arg0:.*]]: !fir.ref<i64>{{.*}}, %[[arg1:.*]]: !fir.ref<i64>{{.*}}, %[[arg2:.*]]: !fir.ref<i64>{{.*}}) {
 subroutine modulo_testi(r, a, p)
   integer(8) :: r, a, p
   ! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<i64>
diff --git a/flang/unittests/Runtime/Numeric.cpp b/flang/unittests/Runtime/Numeric.cpp
index 43263d1..b69ff21 100644
--- a/flang/unittests/Runtime/Numeric.cpp
+++ b/flang/unittests/Runtime/Numeric.cpp
@@ -65,6 +65,30 @@ TEST(Numeric, Mod) {
   EXPECT_EQ(RTNAME(ModReal4)(Real<4>{-8.0}, Real<4>(5.0)), -3.0);
   EXPECT_EQ(RTNAME(ModReal8)(Real<8>{8.0}, Real<8>(-5.0)), 3.0);
   EXPECT_EQ(RTNAME(ModReal8)(Real<8>{-8.0}, Real<8>(-5.0)), -3.0);
+  EXPECT_EQ(
+      RTNAME(ModReal4)(Real<4>{0.5}, std::numeric_limits<Real<4>>::infinity()),
+      0.5);
+  EXPECT_EQ(
+      RTNAME(ModReal4)(Real<4>{-0.5}, std::numeric_limits<Real<4>>::infinity()),
+      -0.5);
+  EXPECT_EQ(
+      RTNAME(ModReal4)(Real<4>{0.5}, -std::numeric_limits<Real<4>>::infinity()),
+      0.5);
+  EXPECT_EQ(RTNAME(ModReal4)(
+                Real<4>{-0.5}, -std::numeric_limits<Real<4>>::infinity()),
+      -0.5);
+  EXPECT_EQ(
+      RTNAME(ModReal8)(Real<8>{0.5}, std::numeric_limits<Real<8>>::infinity()),
+      0.5);
+  EXPECT_EQ(
+      RTNAME(ModReal8)(Real<8>{-0.5}, std::numeric_limits<Real<8>>::infinity()),
+      -0.5);
+  EXPECT_EQ(
+      RTNAME(ModReal8)(Real<8>{0.5}, -std::numeric_limits<Real<8>>::infinity()),
+      0.5);
+  EXPECT_EQ(RTNAME(ModReal8)(
+                Real<8>{-0.5}, -std::numeric_limits<Real<8>>::infinity()),
+      -0.5);
 }
 
 TEST(Numeric, Modulo) {
@@ -76,6 +100,28 @@ TEST(Numeric, Modulo) {
   EXPECT_EQ(RTNAME(ModuloReal4)(Real<4>{-8.0}, Real<4>(5.0)), 2.0);
   EXPECT_EQ(RTNAME(ModuloReal8)(Real<8>{8.0}, Real<8>(-5.0)), -2.0);
   EXPECT_EQ(RTNAME(ModuloReal8)(Real<8>{-8.0}, Real<8>(-5.0)), -3.0);
+  // MODULO(x, INF) == NaN
+  EXPECT_TRUE(std::isnan(RTNAME(ModuloReal4)(
+      Real<4>{0.5}, std::numeric_limits<Real<4>>::infinity())));
+  EXPECT_TRUE(std::isnan(RTNAME(ModuloReal4)(
+      Real<4>{-0.5}, std::numeric_limits<Real<4>>::infinity())));
+  EXPECT_TRUE(std::isnan(RTNAME(ModuloReal4)(
+      Real<4>{0.5}, -std::numeric_limits<Real<4>>::infinity())));
+  EXPECT_TRUE(std::isnan(RTNAME(ModuloReal4)(
+      Real<4>{-0.5}, -std::numeric_limits<Real<4>>::infinity())));
+  EXPECT_TRUE(std::isnan(RTNAME(ModuloReal8)(
+      Real<8>{-0.5}, std::numeric_limits<Real<8>>::infinity())));
+  EXPECT_TRUE(std::isnan(RTNAME(ModuloReal8)(
+      Real<8>{0.5}, std::numeric_limits<Real<8>>::infinity())));
+  EXPECT_TRUE(std::isnan(RTNAME(ModuloReal8)(
+      Real<8>{-0.5}, -std::numeric_limits<Real<8>>::infinity())));
+  EXPECT_TRUE(std::isnan(RTNAME(ModuloReal8)(
+      Real<8>{0.5}, -std::numeric_limits<Real<8>>::infinity())));
+  // MODULO(x, y) for integer values of x and y with 0 remainder.
+  EXPECT_EQ(RTNAME(ModuloReal4)(Real<4>{5.0}, Real<4>(1.0)), 0.0);
+  EXPECT_EQ(RTNAME(ModuloReal4)(Real<4>{5.0}, Real<4>(-1.0)), -0.0);
+  EXPECT_EQ(RTNAME(ModuloReal4)(Real<4>{-5.0}, Real<4>(1.0)), 0.0);
+  EXPECT_EQ(RTNAME(ModuloReal4)(Real<4>{-5.0}, Real<4>(-1.0)), -0.0);
 }
 
 TEST(Numeric, Nearest) {
-- 
cgit v1.1