aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Klemm <michael.klemm@amd.com>2024-07-02 18:37:33 +0200
committerGitHub <noreply@github.com>2024-07-02 18:37:33 +0200
commit7359edbc0981280e029701aa1ddee7ed313126dc (patch)
tree6fb3827cd88c3c9790612b22e7662295fc7e7642
parente414bf9fffcb9b6010c7eb08406696a9de931d66 (diff)
downloadllvm-7359edbc0981280e029701aa1ddee7ed313126dc.zip
llvm-7359edbc0981280e029701aa1ddee7ed313126dc.tar.gz
llvm-7359edbc0981280e029701aa1ddee7ed313126dc.tar.bz2
[Flang][runtime] Distinguish CPU time and elapsed time for cpu_time and system_clock (#96652)
The current implementation for `system_clock()` returns the CPU time instead of elapsed wallclock time. This PR fixes the issue and makes `system_clock()` correctly return elapsed time.
-rw-r--r--flang/runtime/time-intrinsic.cpp113
-rw-r--r--flang/test/Runtime/no-cpp-dep.c2
2 files changed, 64 insertions, 51 deletions
diff --git a/flang/runtime/time-intrinsic.cpp b/flang/runtime/time-intrinsic.cpp
index a141fe63..ac372b2 100644
--- a/flang/runtime/time-intrinsic.cpp
+++ b/flang/runtime/time-intrinsic.cpp
@@ -64,20 +64,29 @@ template <typename Unused = void> double GetCpuTime(fallback_implementation) {
// clock_gettime is implemented in the pthread library for MinGW.
// Using it here would mean that all programs that link libFortranRuntime are
// required to also link to pthread. Instead, don't use the function.
-#undef CLOCKID
-#elif defined CLOCK_PROCESS_CPUTIME_ID
-#define CLOCKID CLOCK_PROCESS_CPUTIME_ID
+#undef CLOCKID_CPU_TIME
+#undef CLOCKID_ELAPSED_TIME
+#else
+// Determine what clock to use for CPU time.
+#if defined CLOCK_PROCESS_CPUTIME_ID
+#define CLOCKID_CPU_TIME CLOCK_PROCESS_CPUTIME_ID
#elif defined CLOCK_THREAD_CPUTIME_ID
-#define CLOCKID CLOCK_THREAD_CPUTIME_ID
-#elif defined CLOCK_MONOTONIC
-#define CLOCKID CLOCK_MONOTONIC
+#define CLOCKID_CPU_TIME CLOCK_THREAD_CPUTIME_ID
+#else
+#undef CLOCKID_CPU_TIME
+#endif
+
+// Determine what clock to use for elapsed time.
+#if defined CLOCK_MONOTONIC
+#define CLOCKID_ELAPSED_TIME CLOCK_MONOTONIC
#elif defined CLOCK_REALTIME
-#define CLOCKID CLOCK_REALTIME
+#define CLOCKID_ELAPSED_TIME CLOCK_REALTIME
#else
-#undef CLOCKID
+#undef CLOCKID_ELAPSED_TIME
+#endif
#endif
-#ifdef CLOCKID
+#ifdef CLOCKID_CPU_TIME
// POSIX implementation using clock_gettime. This is only enabled where
// clock_gettime is available.
template <typename T = int, typename U = struct timespec>
@@ -86,17 +95,26 @@ double GetCpuTime(preferred_implementation,
T ClockId = 0, U *Timespec = nullptr,
decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) {
struct timespec tspec;
- if (clock_gettime(CLOCKID, &tspec) == 0) {
+ if (clock_gettime(CLOCKID_CPU_TIME, &tspec) == 0) {
return tspec.tv_nsec * 1.0e-9 + tspec.tv_sec;
}
// Return some negative value to represent failure.
return -1.0;
}
-#endif
+#endif // CLOCKID_CPU_TIME
using count_t = std::int64_t;
using unsigned_count_t = std::uint64_t;
+// POSIX implementation using clock_gettime where available. The clock_gettime
+// result is in nanoseconds, which is converted as necessary to
+// - deciseconds for kind 1
+// - milliseconds for kinds 2, 4
+// - nanoseconds for kinds 8, 16
+constexpr unsigned_count_t DS_PER_SEC{10u};
+constexpr unsigned_count_t MS_PER_SEC{1'000u};
+constexpr unsigned_count_t NS_PER_SEC{1'000'000'000u};
+
// Computes HUGE(INT(0,kind)) as an unsigned integer value.
static constexpr inline unsigned_count_t GetHUGE(int kind) {
if (kind > 8) {
@@ -105,51 +123,49 @@ static constexpr inline unsigned_count_t GetHUGE(int kind) {
return (unsigned_count_t{1} << ((8 * kind) - 1)) - 1;
}
-// This is the fallback implementation, which should work everywhere. Note that
-// in general we can't recover after std::clock has reached its maximum value.
+// Function converts a std::timespec_t into the desired count to
+// be returned by the timing functions in accordance with the requested
+// kind at the call site.
+count_t ConvertTimeSpecToCount(int kind, const std::timespec &tspec) {
+ const unsigned_count_t huge{GetHUGE(kind)};
+ unsigned_count_t sec{static_cast<unsigned_count_t>(tspec.tv_sec)};
+ unsigned_count_t nsec{static_cast<unsigned_count_t>(tspec.tv_nsec)};
+ if (kind >= 8) {
+ return (sec * NS_PER_SEC + nsec) % (huge + 1);
+ } else if (kind >= 2) {
+ return (sec * MS_PER_SEC + (nsec / (NS_PER_SEC / MS_PER_SEC))) % (huge + 1);
+ } else { // kind == 1
+ return (sec * DS_PER_SEC + (nsec / (NS_PER_SEC / DS_PER_SEC))) % (huge + 1);
+ }
+}
+
+// This is the fallback implementation, which should work everywhere.
template <typename Unused = void>
count_t GetSystemClockCount(int kind, fallback_implementation) {
- std::clock_t timestamp{std::clock()};
- if (timestamp == static_cast<std::clock_t>(-1)) {
+ std::timespec tspec;
+
+ if (std::timespec_get(&tspec, TIME_UTC) < 0) {
// Return -HUGE(COUNT) to represent failure.
return -static_cast<count_t>(GetHUGE(kind));
}
- // Convert the timestamp to std::uint64_t with wrap-around. The timestamp is
- // most likely a floating-point value (since C'11), so compute the modulus
- // carefully when one is required.
- constexpr auto maxUnsignedCount{std::numeric_limits<unsigned_count_t>::max()};
- if constexpr (std::numeric_limits<std::clock_t>::max() > maxUnsignedCount) {
- timestamp -= maxUnsignedCount * std::floor(timestamp / maxUnsignedCount);
- }
- unsigned_count_t unsignedCount{static_cast<unsigned_count_t>(timestamp)};
- // Return the modulus of the unsigned integral count with HUGE(COUNT)+1.
- // The result is a signed integer but never negative.
- return static_cast<count_t>(unsignedCount % (GetHUGE(kind) + 1));
+
+ // Compute the timestamp as seconds plus nanoseconds in accordance
+ // with the requested kind at the call site.
+ return ConvertTimeSpecToCount(kind, tspec);
}
template <typename Unused = void>
count_t GetSystemClockCountRate(int kind, fallback_implementation) {
- return CLOCKS_PER_SEC;
+ return kind >= 8 ? NS_PER_SEC : kind >= 2 ? MS_PER_SEC : DS_PER_SEC;
}
template <typename Unused = void>
count_t GetSystemClockCountMax(int kind, fallback_implementation) {
- constexpr auto max_clock_t{std::numeric_limits<std::clock_t>::max()};
unsigned_count_t maxCount{GetHUGE(kind)};
- return max_clock_t <= maxCount ? static_cast<count_t>(max_clock_t)
- : static_cast<count_t>(maxCount);
+ return maxCount;
}
-// POSIX implementation using clock_gettime where available. The clock_gettime
-// result is in nanoseconds, which is converted as necessary to
-// - deciseconds for kind 1
-// - milliseconds for kinds 2, 4
-// - nanoseconds for kinds 8, 16
-constexpr unsigned_count_t DS_PER_SEC{10u};
-constexpr unsigned_count_t MS_PER_SEC{1'000u};
-constexpr unsigned_count_t NS_PER_SEC{1'000'000'000u};
-
-#ifdef CLOCKID
+#ifdef CLOCKID_ELAPSED_TIME
template <typename T = int, typename U = struct timespec>
count_t GetSystemClockCount(int kind, preferred_implementation,
// We need some dummy parameters to pass to decltype(clock_gettime).
@@ -157,20 +173,15 @@ count_t GetSystemClockCount(int kind, preferred_implementation,
decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) {
struct timespec tspec;
const unsigned_count_t huge{GetHUGE(kind)};
- if (clock_gettime(CLOCKID, &tspec) != 0) {
+ if (clock_gettime(CLOCKID_ELAPSED_TIME, &tspec) != 0) {
return -huge; // failure
}
- unsigned_count_t sec{static_cast<unsigned_count_t>(tspec.tv_sec)};
- unsigned_count_t nsec{static_cast<unsigned_count_t>(tspec.tv_nsec)};
- if (kind >= 8) {
- return (sec * NS_PER_SEC + nsec) % (huge + 1);
- } else if (kind >= 2) {
- return (sec * MS_PER_SEC + (nsec / (NS_PER_SEC / MS_PER_SEC))) % (huge + 1);
- } else { // kind == 1
- return (sec * DS_PER_SEC + (nsec / (NS_PER_SEC / DS_PER_SEC))) % (huge + 1);
- }
+
+ // Compute the timestamp as seconds plus nanoseconds in accordance
+ // with the requested kind at the call site.
+ return ConvertTimeSpecToCount(kind, tspec);
}
-#endif
+#endif // CLOCKID_ELAPSED_TIME
template <typename T = int, typename U = struct timespec>
count_t GetSystemClockCountRate(int kind, preferred_implementation,
diff --git a/flang/test/Runtime/no-cpp-dep.c b/flang/test/Runtime/no-cpp-dep.c
index 654bebe..606a5d1 100644
--- a/flang/test/Runtime/no-cpp-dep.c
+++ b/flang/test/Runtime/no-cpp-dep.c
@@ -30,6 +30,7 @@ int32_t RTNAME(ArgumentCount)();
int32_t RTNAME(GetCommandArgument)(int32_t, const struct Descriptor *,
const struct Descriptor *, const struct Descriptor *);
int32_t RTNAME(GetEnvVariable)();
+int64_t RTNAME(SystemClockCount)(int kind);
int main() {
double x = RTNAME(CpuTime)();
@@ -37,5 +38,6 @@ int main() {
int32_t c = RTNAME(ArgumentCount)();
int32_t v = RTNAME(GetCommandArgument)(0, 0, 0, 0);
int32_t e = RTNAME(GetEnvVariable)("FOO", 0, 0);
+ int64_t t = RTNAME(SystemClockCount)(8);
return x + c + v + e;
}