diff options
author | Michael Klemm <michael.klemm@amd.com> | 2024-07-02 18:37:33 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-02 18:37:33 +0200 |
commit | 7359edbc0981280e029701aa1ddee7ed313126dc (patch) | |
tree | 6fb3827cd88c3c9790612b22e7662295fc7e7642 | |
parent | e414bf9fffcb9b6010c7eb08406696a9de931d66 (diff) | |
download | llvm-7359edbc0981280e029701aa1ddee7ed313126dc.zip llvm-7359edbc0981280e029701aa1ddee7ed313126dc.tar.gz llvm-7359edbc0981280e029701aa1ddee7ed313126dc.tar.bz2 |
[Flang][runtime] Distinguish CPU time and elapsed time for cpu_time and system_clock (#96652)
The current implementation for `system_clock()` returns the CPU time
instead of elapsed wallclock time. This PR fixes the issue and makes
`system_clock()` correctly return elapsed time.
-rw-r--r-- | flang/runtime/time-intrinsic.cpp | 113 | ||||
-rw-r--r-- | flang/test/Runtime/no-cpp-dep.c | 2 |
2 files changed, 64 insertions, 51 deletions
diff --git a/flang/runtime/time-intrinsic.cpp b/flang/runtime/time-intrinsic.cpp index a141fe63..ac372b2 100644 --- a/flang/runtime/time-intrinsic.cpp +++ b/flang/runtime/time-intrinsic.cpp @@ -64,20 +64,29 @@ template <typename Unused = void> double GetCpuTime(fallback_implementation) { // clock_gettime is implemented in the pthread library for MinGW. // Using it here would mean that all programs that link libFortranRuntime are // required to also link to pthread. Instead, don't use the function. -#undef CLOCKID -#elif defined CLOCK_PROCESS_CPUTIME_ID -#define CLOCKID CLOCK_PROCESS_CPUTIME_ID +#undef CLOCKID_CPU_TIME +#undef CLOCKID_ELAPSED_TIME +#else +// Determine what clock to use for CPU time. +#if defined CLOCK_PROCESS_CPUTIME_ID +#define CLOCKID_CPU_TIME CLOCK_PROCESS_CPUTIME_ID #elif defined CLOCK_THREAD_CPUTIME_ID -#define CLOCKID CLOCK_THREAD_CPUTIME_ID -#elif defined CLOCK_MONOTONIC -#define CLOCKID CLOCK_MONOTONIC +#define CLOCKID_CPU_TIME CLOCK_THREAD_CPUTIME_ID +#else +#undef CLOCKID_CPU_TIME +#endif + +// Determine what clock to use for elapsed time. +#if defined CLOCK_MONOTONIC +#define CLOCKID_ELAPSED_TIME CLOCK_MONOTONIC #elif defined CLOCK_REALTIME -#define CLOCKID CLOCK_REALTIME +#define CLOCKID_ELAPSED_TIME CLOCK_REALTIME #else -#undef CLOCKID +#undef CLOCKID_ELAPSED_TIME +#endif #endif -#ifdef CLOCKID +#ifdef CLOCKID_CPU_TIME // POSIX implementation using clock_gettime. This is only enabled where // clock_gettime is available. template <typename T = int, typename U = struct timespec> @@ -86,17 +95,26 @@ double GetCpuTime(preferred_implementation, T ClockId = 0, U *Timespec = nullptr, decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) { struct timespec tspec; - if (clock_gettime(CLOCKID, &tspec) == 0) { + if (clock_gettime(CLOCKID_CPU_TIME, &tspec) == 0) { return tspec.tv_nsec * 1.0e-9 + tspec.tv_sec; } // Return some negative value to represent failure. return -1.0; } -#endif +#endif // CLOCKID_CPU_TIME using count_t = std::int64_t; using unsigned_count_t = std::uint64_t; +// POSIX implementation using clock_gettime where available. The clock_gettime +// result is in nanoseconds, which is converted as necessary to +// - deciseconds for kind 1 +// - milliseconds for kinds 2, 4 +// - nanoseconds for kinds 8, 16 +constexpr unsigned_count_t DS_PER_SEC{10u}; +constexpr unsigned_count_t MS_PER_SEC{1'000u}; +constexpr unsigned_count_t NS_PER_SEC{1'000'000'000u}; + // Computes HUGE(INT(0,kind)) as an unsigned integer value. static constexpr inline unsigned_count_t GetHUGE(int kind) { if (kind > 8) { @@ -105,51 +123,49 @@ static constexpr inline unsigned_count_t GetHUGE(int kind) { return (unsigned_count_t{1} << ((8 * kind) - 1)) - 1; } -// This is the fallback implementation, which should work everywhere. Note that -// in general we can't recover after std::clock has reached its maximum value. +// Function converts a std::timespec_t into the desired count to +// be returned by the timing functions in accordance with the requested +// kind at the call site. +count_t ConvertTimeSpecToCount(int kind, const std::timespec &tspec) { + const unsigned_count_t huge{GetHUGE(kind)}; + unsigned_count_t sec{static_cast<unsigned_count_t>(tspec.tv_sec)}; + unsigned_count_t nsec{static_cast<unsigned_count_t>(tspec.tv_nsec)}; + if (kind >= 8) { + return (sec * NS_PER_SEC + nsec) % (huge + 1); + } else if (kind >= 2) { + return (sec * MS_PER_SEC + (nsec / (NS_PER_SEC / MS_PER_SEC))) % (huge + 1); + } else { // kind == 1 + return (sec * DS_PER_SEC + (nsec / (NS_PER_SEC / DS_PER_SEC))) % (huge + 1); + } +} + +// This is the fallback implementation, which should work everywhere. template <typename Unused = void> count_t GetSystemClockCount(int kind, fallback_implementation) { - std::clock_t timestamp{std::clock()}; - if (timestamp == static_cast<std::clock_t>(-1)) { + std::timespec tspec; + + if (std::timespec_get(&tspec, TIME_UTC) < 0) { // Return -HUGE(COUNT) to represent failure. return -static_cast<count_t>(GetHUGE(kind)); } - // Convert the timestamp to std::uint64_t with wrap-around. The timestamp is - // most likely a floating-point value (since C'11), so compute the modulus - // carefully when one is required. - constexpr auto maxUnsignedCount{std::numeric_limits<unsigned_count_t>::max()}; - if constexpr (std::numeric_limits<std::clock_t>::max() > maxUnsignedCount) { - timestamp -= maxUnsignedCount * std::floor(timestamp / maxUnsignedCount); - } - unsigned_count_t unsignedCount{static_cast<unsigned_count_t>(timestamp)}; - // Return the modulus of the unsigned integral count with HUGE(COUNT)+1. - // The result is a signed integer but never negative. - return static_cast<count_t>(unsignedCount % (GetHUGE(kind) + 1)); + + // Compute the timestamp as seconds plus nanoseconds in accordance + // with the requested kind at the call site. + return ConvertTimeSpecToCount(kind, tspec); } template <typename Unused = void> count_t GetSystemClockCountRate(int kind, fallback_implementation) { - return CLOCKS_PER_SEC; + return kind >= 8 ? NS_PER_SEC : kind >= 2 ? MS_PER_SEC : DS_PER_SEC; } template <typename Unused = void> count_t GetSystemClockCountMax(int kind, fallback_implementation) { - constexpr auto max_clock_t{std::numeric_limits<std::clock_t>::max()}; unsigned_count_t maxCount{GetHUGE(kind)}; - return max_clock_t <= maxCount ? static_cast<count_t>(max_clock_t) - : static_cast<count_t>(maxCount); + return maxCount; } -// POSIX implementation using clock_gettime where available. The clock_gettime -// result is in nanoseconds, which is converted as necessary to -// - deciseconds for kind 1 -// - milliseconds for kinds 2, 4 -// - nanoseconds for kinds 8, 16 -constexpr unsigned_count_t DS_PER_SEC{10u}; -constexpr unsigned_count_t MS_PER_SEC{1'000u}; -constexpr unsigned_count_t NS_PER_SEC{1'000'000'000u}; - -#ifdef CLOCKID +#ifdef CLOCKID_ELAPSED_TIME template <typename T = int, typename U = struct timespec> count_t GetSystemClockCount(int kind, preferred_implementation, // We need some dummy parameters to pass to decltype(clock_gettime). @@ -157,20 +173,15 @@ count_t GetSystemClockCount(int kind, preferred_implementation, decltype(clock_gettime(ClockId, Timespec)) *Enabled = nullptr) { struct timespec tspec; const unsigned_count_t huge{GetHUGE(kind)}; - if (clock_gettime(CLOCKID, &tspec) != 0) { + if (clock_gettime(CLOCKID_ELAPSED_TIME, &tspec) != 0) { return -huge; // failure } - unsigned_count_t sec{static_cast<unsigned_count_t>(tspec.tv_sec)}; - unsigned_count_t nsec{static_cast<unsigned_count_t>(tspec.tv_nsec)}; - if (kind >= 8) { - return (sec * NS_PER_SEC + nsec) % (huge + 1); - } else if (kind >= 2) { - return (sec * MS_PER_SEC + (nsec / (NS_PER_SEC / MS_PER_SEC))) % (huge + 1); - } else { // kind == 1 - return (sec * DS_PER_SEC + (nsec / (NS_PER_SEC / DS_PER_SEC))) % (huge + 1); - } + + // Compute the timestamp as seconds plus nanoseconds in accordance + // with the requested kind at the call site. + return ConvertTimeSpecToCount(kind, tspec); } -#endif +#endif // CLOCKID_ELAPSED_TIME template <typename T = int, typename U = struct timespec> count_t GetSystemClockCountRate(int kind, preferred_implementation, diff --git a/flang/test/Runtime/no-cpp-dep.c b/flang/test/Runtime/no-cpp-dep.c index 654bebe..606a5d1 100644 --- a/flang/test/Runtime/no-cpp-dep.c +++ b/flang/test/Runtime/no-cpp-dep.c @@ -30,6 +30,7 @@ int32_t RTNAME(ArgumentCount)(); int32_t RTNAME(GetCommandArgument)(int32_t, const struct Descriptor *, const struct Descriptor *, const struct Descriptor *); int32_t RTNAME(GetEnvVariable)(); +int64_t RTNAME(SystemClockCount)(int kind); int main() { double x = RTNAME(CpuTime)(); @@ -37,5 +38,6 @@ int main() { int32_t c = RTNAME(ArgumentCount)(); int32_t v = RTNAME(GetCommandArgument)(0, 0, 0, 0); int32_t e = RTNAME(GetEnvVariable)("FOO", 0, 0); + int64_t t = RTNAME(SystemClockCount)(8); return x + c + v + e; } |