//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html #include #include #include #include #include #include #include "include/tzdb/time_zone_private.h" #include "include/tzdb/types_private.h" #include "include/tzdb/tzdb_list_private.h" #include "include/tzdb/tzdb_private.h" // Contains a parser for the IANA time zone data files. // // These files can be found at https://data.iana.org/time-zones/ and are in the // public domain. Information regarding the input can be found at // https://data.iana.org/time-zones/tz-how-to.html and // https://man7.org/linux/man-pages/man8/zic.8.html. // // As indicated at https://howardhinnant.github.io/date/tz.html#Installation // For Windows another file seems to be required // https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml // This file seems to contain the mapping of Windows time zone name to IANA // time zone names. // // However this article mentions another way to do the mapping on Windows // https://devblogs.microsoft.com/oldnewthing/20210527-00/?p=105255 // This requires Windows 10 Version 1903, which was released in May of 2019 // and considered end of life in December 2020 // https://learn.microsoft.com/en-us/lifecycle/announcements/windows-10-1903-end-of-servicing // // TODO TZDB Implement the Windows mapping in tzdb::current_zone _LIBCPP_BEGIN_NAMESPACE_STD namespace chrono { // This function is weak so it can be overriden in the tests. The // declaration is in the test header test/support/test_tzdb.h _LIBCPP_WEAK string_view __libcpp_tzdb_directory() { #if defined(__linux__) return "/usr/share/zoneinfo/"; #else # error "unknown path to the IANA Time Zone Database" #endif } //===----------------------------------------------------------------------===// // Details //===----------------------------------------------------------------------===// [[nodiscard]] static bool __is_whitespace(int __c) { return __c == ' ' || __c == '\t'; } static void __skip_optional_whitespace(istream& __input) { while (chrono::__is_whitespace(__input.peek())) __input.get(); } static void __skip_mandatory_whitespace(istream& __input) { if (!chrono::__is_whitespace(__input.get())) std::__throw_runtime_error("corrupt tzdb: expected whitespace"); chrono::__skip_optional_whitespace(__input); } [[nodiscard]] static bool __is_eol(int __c) { return __c == '\n' || __c == std::char_traits::eof(); } static void __skip_line(istream& __input) { while (!chrono::__is_eol(__input.peek())) { __input.get(); } __input.get(); } static void __skip(istream& __input, char __suffix) { if (std::tolower(__input.peek()) == __suffix) __input.get(); } static void __skip(istream& __input, string_view __suffix) { for (auto __c : __suffix) if (std::tolower(__input.peek()) == __c) __input.get(); } static void __matches(istream& __input, char __expected) { if (std::tolower(__input.get()) != __expected) std::__throw_runtime_error((string("corrupt tzdb: expected character '") + __expected + '\'').c_str()); } static void __matches(istream& __input, string_view __expected) { for (auto __c : __expected) if (std::tolower(__input.get()) != __c) std::__throw_runtime_error((string("corrupt tzdb: expected string '") + string(__expected) + '\'').c_str()); } [[nodiscard]] static string __parse_string(istream& __input) { string __result; while (true) { int __c = __input.get(); switch (__c) { case ' ': case '\t': case '\n': __input.unget(); [[fallthrough]]; case istream::traits_type::eof(): if (__result.empty()) std::__throw_runtime_error("corrupt tzdb: expected a string"); return __result; default: __result.push_back(__c); } } } [[nodiscard]] static int64_t __parse_integral(istream& __input, bool __leading_zero_allowed) { int64_t __result = __input.get(); if (__leading_zero_allowed) { if (__result < '0' || __result > '9') std::__throw_runtime_error("corrupt tzdb: expected a digit"); } else { if (__result < '1' || __result > '9') std::__throw_runtime_error("corrupt tzdb: expected a non-zero digit"); } __result -= '0'; while (true) { if (__input.peek() < '0' || __input.peek() > '9') return __result; // In order to avoid possible overflows we limit the accepted range. // Most values parsed are expected to be very small: // - 8784 hours in a year // - 31 days in a month // - year no real maximum, these values are expected to be less than // the range of the year type. // // However the leapseconds use a seconds after epoch value. Using an // int would run into an overflow in 2038. By using a 64-bit value // the range is large enough for the bilions of years. Limiting that // range slightly to make the code easier is not an issue. if (__result > (std::numeric_limits::max() / 16)) std::__throw_runtime_error("corrupt tzdb: integral too large"); __result *= 10; __result += __input.get() - '0'; } } //===----------------------------------------------------------------------===// // Calendar //===----------------------------------------------------------------------===// [[nodiscard]] static day __parse_day(istream& __input) { unsigned __result = chrono::__parse_integral(__input, false); if (__result > 31) std::__throw_runtime_error("corrupt tzdb day: value too large"); return day{__result}; } [[nodiscard]] static weekday __parse_weekday(istream& __input) { // TZDB allows the shortest unique name. switch (std::tolower(__input.get())) { case 'f': chrono::__skip(__input, "riday"); return Friday; case 'm': chrono::__skip(__input, "onday"); return Monday; case 's': switch (std::tolower(__input.get())) { case 'a': chrono::__skip(__input, "turday"); return Saturday; case 'u': chrono::__skip(__input, "nday"); return Sunday; } break; case 't': switch (std::tolower(__input.get())) { case 'h': chrono::__skip(__input, "ursday"); return Thursday; case 'u': chrono::__skip(__input, "esday"); return Tuesday; } break; case 'w': chrono::__skip(__input, "ednesday"); return Wednesday; } std::__throw_runtime_error("corrupt tzdb weekday: invalid name"); } [[nodiscard]] static month __parse_month(istream& __input) { // TZDB allows the shortest unique name. switch (std::tolower(__input.get())) { case 'a': switch (std::tolower(__input.get())) { case 'p': chrono::__skip(__input, "ril"); return April; case 'u': chrono::__skip(__input, "gust"); return August; } break; case 'd': chrono::__skip(__input, "ecember"); return December; case 'f': chrono::__skip(__input, "ebruary"); return February; case 'j': switch (std::tolower(__input.get())) { case 'a': chrono::__skip(__input, "nuary"); return January; case 'u': switch (std::tolower(__input.get())) { case 'n': chrono::__skip(__input, 'e'); return June; case 'l': chrono::__skip(__input, 'y'); return July; } } break; case 'm': if (std::tolower(__input.get()) == 'a') switch (std::tolower(__input.get())) { case 'y': return May; case 'r': chrono::__skip(__input, "ch"); return March; } break; case 'n': chrono::__skip(__input, "ovember"); return November; case 'o': chrono::__skip(__input, "ctober"); return October; case 's': chrono::__skip(__input, "eptember"); return September; } std::__throw_runtime_error("corrupt tzdb month: invalid name"); } [[nodiscard]] static year __parse_year_value(istream& __input) { bool __negative = __input.peek() == '-'; if (__negative) [[unlikely]] __input.get(); int64_t __result = __parse_integral(__input, true); if (__result > static_cast(year::max())) { if (__negative) std::__throw_runtime_error("corrupt tzdb year: year is less than the minimum"); std::__throw_runtime_error("corrupt tzdb year: year is greater than the maximum"); } return year{static_cast(__negative ? -__result : __result)}; } [[nodiscard]] static year __parse_year(istream& __input) { if (std::tolower(__input.peek()) != 'm') [[likely]] return chrono::__parse_year_value(__input); __input.get(); switch (std::tolower(__input.peek())) { case 'i': __input.get(); chrono::__skip(__input, 'n'); [[fallthrough]]; case ' ': // The m is minimum, even when that is ambiguous. return year::min(); case 'a': __input.get(); chrono::__skip(__input, 'x'); return year::max(); } std::__throw_runtime_error("corrupt tzdb year: expected 'min' or 'max'"); } //===----------------------------------------------------------------------===// // TZDB fields //===----------------------------------------------------------------------===// [[nodiscard]] static year __parse_to(istream& __input, year __only) { if (std::tolower(__input.peek()) != 'o') return chrono::__parse_year(__input); __input.get(); chrono::__skip(__input, "nly"); return __only; } [[nodiscard]] static __tz::__constrained_weekday::__comparison_t __parse_comparison(istream& __input) { switch (__input.get()) { case '>': chrono::__matches(__input, '='); return __tz::__constrained_weekday::__ge; case '<': chrono::__matches(__input, '='); return __tz::__constrained_weekday::__le; } std::__throw_runtime_error("corrupt tzdb on: expected '>=' or '<='"); } [[nodiscard]] static __tz::__on __parse_on(istream& __input) { if (std::isdigit(__input.peek())) return chrono::__parse_day(__input); if (std::tolower(__input.peek()) == 'l') { chrono::__matches(__input, "last"); return weekday_last(chrono::__parse_weekday(__input)); } return __tz::__constrained_weekday{ chrono::__parse_weekday(__input), chrono::__parse_comparison(__input), chrono::__parse_day(__input)}; } [[nodiscard]] static seconds __parse_duration(istream& __input) { seconds __result{0}; int __c = __input.peek(); bool __negative = __c == '-'; if (__negative) { __input.get(); // Negative is either a negative value or a single -. // The latter means 0 and the parsing is complete. if (!std::isdigit(__input.peek())) return __result; } __result += hours(__parse_integral(__input, true)); if (__input.peek() != ':') return __negative ? -__result : __result; __input.get(); __result += minutes(__parse_integral(__input, true)); if (__input.peek() != ':') return __negative ? -__result : __result; __input.get(); __result += seconds(__parse_integral(__input, true)); if (__input.peek() != '.') return __negative ? -__result : __result; __input.get(); (void)__parse_integral(__input, true); // Truncate the digits. return __negative ? -__result : __result; } [[nodiscard]] static __tz::__clock __parse_clock(istream& __input) { switch (__input.get()) { // case sensitive case 'w': return __tz::__clock::__local; case 's': return __tz::__clock::__standard; case 'u': case 'g': case 'z': return __tz::__clock::__universal; } __input.unget(); return __tz::__clock::__local; } [[nodiscard]] static bool __parse_dst(istream& __input, seconds __offset) { switch (__input.get()) { // case sensitive case 's': return false; case 'd': return true; } __input.unget(); return __offset != 0s; } [[nodiscard]] static __tz::__at __parse_at(istream& __input) { return {__parse_duration(__input), __parse_clock(__input)}; } [[nodiscard]] static __tz::__save __parse_save(istream& __input) { seconds __time = chrono::__parse_duration(__input); return {__time, chrono::__parse_dst(__input, __time)}; } [[nodiscard]] static string __parse_letters(istream& __input) { string __result = __parse_string(__input); // Canonicalize "-" to "" since they are equivalent in the specification. return __result != "-" ? __result : ""; } [[nodiscard]] static __tz::__continuation::__rules_t __parse_rules(istream& __input) { int __c = __input.peek(); // A single - is not a SAVE but a special case. if (__c == '-') { __input.get(); if (chrono::__is_whitespace(__input.peek())) return monostate{}; __input.unget(); return chrono::__parse_save(__input); } if (std::isdigit(__c) || __c == '+') return chrono::__parse_save(__input); return chrono::__parse_string(__input); } [[nodiscard]] static __tz::__continuation __parse_continuation(__tz::__rules_storage_type& __rules, istream& __input) { __tz::__continuation __result; __result.__rule_database_ = std::addressof(__rules); // Note STDOFF is specified as // This field has the same format as the AT and SAVE fields of rule lines; // These fields have different suffix letters, these letters seem // not to be used so do not allow any of them. __result.__stdoff = chrono::__parse_duration(__input); chrono::__skip_mandatory_whitespace(__input); __result.__rules = chrono::__parse_rules(__input); chrono::__skip_mandatory_whitespace(__input); __result.__format = chrono::__parse_string(__input); chrono::__skip_optional_whitespace(__input); if (chrono::__is_eol(__input.peek())) return __result; __result.__year = chrono::__parse_year(__input); chrono::__skip_optional_whitespace(__input); if (chrono::__is_eol(__input.peek())) return __result; __result.__in = chrono::__parse_month(__input); chrono::__skip_optional_whitespace(__input); if (chrono::__is_eol(__input.peek())) return __result; __result.__on = chrono::__parse_on(__input); chrono::__skip_optional_whitespace(__input); if (chrono::__is_eol(__input.peek())) return __result; __result.__at = __parse_at(__input); return __result; } //===----------------------------------------------------------------------===// // Time Zone Database entries //===----------------------------------------------------------------------===// static string __parse_version(istream& __input) { // The first line in tzdata.zi contains // # version YYYYw // The parser expects this pattern // #\s*version\s*\(.*) // This part is not documented. chrono::__matches(__input, '#'); chrono::__skip_optional_whitespace(__input); chrono::__matches(__input, "version"); chrono::__skip_mandatory_whitespace(__input); return chrono::__parse_string(__input); } [[nodiscard]] static __tz::__rule& __create_entry(__tz::__rules_storage_type& __rules, const string& __name) { auto __result = [&]() -> __tz::__rule& { auto& __rule = __rules.emplace_back(__name, vector<__tz::__rule>{}); return __rule.second.emplace_back(); }; if (__rules.empty()) return __result(); // Typically rules are in contiguous order in the database. // But there are exceptions, some rules are interleaved. if (__rules.back().first == __name) return __rules.back().second.emplace_back(); if (auto __it = ranges::find(__rules, __name, [](const auto& __r) { return __r.first; }); __it != ranges::end(__rules)) return __it->second.emplace_back(); return __result(); } static void __parse_rule(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istream& __input) { chrono::__skip_mandatory_whitespace(__input); string __name = chrono::__parse_string(__input); __tz::__rule& __rule = __create_entry(__rules, __name); chrono::__skip_mandatory_whitespace(__input); __rule.__from = chrono::__parse_year(__input); chrono::__skip_mandatory_whitespace(__input); __rule.__to = chrono::__parse_to(__input, __rule.__from); chrono::__skip_mandatory_whitespace(__input); chrono::__matches(__input, '-'); chrono::__skip_mandatory_whitespace(__input); __rule.__in = chrono::__parse_month(__input); chrono::__skip_mandatory_whitespace(__input); __rule.__on = chrono::__parse_on(__input); chrono::__skip_mandatory_whitespace(__input); __rule.__at = __parse_at(__input); chrono::__skip_mandatory_whitespace(__input); __rule.__save = __parse_save(__input); chrono::__skip_mandatory_whitespace(__input); __rule.__letters = chrono::__parse_letters(__input); chrono::__skip_line(__input); } static void __parse_zone(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istream& __input) { chrono::__skip_mandatory_whitespace(__input); auto __p = std::make_unique(chrono::__parse_string(__input), __rules); vector<__tz::__continuation>& __continuations = __p->__continuations(); chrono::__skip_mandatory_whitespace(__input); do { // The first line must be valid, continuations are optional. __continuations.emplace_back(__parse_continuation(__rules, __input)); chrono::__skip_line(__input); chrono::__skip_optional_whitespace(__input); } while (std::isdigit(__input.peek()) || __input.peek() == '-'); __tzdb.zones.emplace_back(time_zone::__create(std::move(__p))); } static void __parse_link(tzdb& __tzdb, istream& __input) { chrono::__skip_mandatory_whitespace(__input); string __target = chrono::__parse_string(__input); chrono::__skip_mandatory_whitespace(__input); string __name = chrono::__parse_string(__input); chrono::__skip_line(__input); __tzdb.links.emplace_back(std::__private_constructor_tag{}, std::move(__name), std::move(__target)); } static void __parse_tzdata(tzdb& __db, __tz::__rules_storage_type& __rules, istream& __input) { while (true) { int __c = std::tolower(__input.get()); switch (__c) { case istream::traits_type::eof(): return; case ' ': case '\t': case '\n': break; case '#': chrono::__skip_line(__input); break; case 'r': chrono::__skip(__input, "ule"); chrono::__parse_rule(__db, __rules, __input); break; case 'z': chrono::__skip(__input, "one"); chrono::__parse_zone(__db, __rules, __input); break; case 'l': chrono::__skip(__input, "ink"); chrono::__parse_link(__db, __input); break; default: std::__throw_runtime_error("corrupt tzdb: unexpected input"); } } } static void __parse_leap_seconds(vector& __leap_seconds, istream&& __input) { // The file stores dates since 1 January 1900, 00:00:00, we want // seconds since 1 January 1970. constexpr auto __offset = sys_days{1970y / January / 1} - sys_days{1900y / January / 1}; while (true) { switch (__input.peek()) { case istream::traits_type::eof(): return; case ' ': case '\t': case '\n': __input.get(); continue; case '#': chrono::__skip_line(__input); continue; } sys_seconds __date = sys_seconds{seconds{chrono::__parse_integral(__input, false)}} - __offset; chrono::__skip_mandatory_whitespace(__input); seconds __value{chrono::__parse_integral(__input, false)}; chrono::__skip_line(__input); __leap_seconds.emplace_back(std::__private_constructor_tag{}, __date, __value); } } void __init_tzdb(tzdb& __tzdb, __tz::__rules_storage_type& __rules) { filesystem::path __root = chrono::__libcpp_tzdb_directory(); ifstream __tzdata{__root / "tzdata.zi"}; __tzdb.version = chrono::__parse_version(__tzdata); chrono::__parse_tzdata(__tzdb, __rules, __tzdata); std::ranges::sort(__tzdb.zones); std::ranges::sort(__tzdb.links); std::ranges::sort(__rules, {}, [](const auto& p) { return p.first; }); // There are two files with the leap second information // - leapseconds as specified by zic // - leap-seconds.list the source data // The latter is much easier to parse, it seems Howard shares that // opinion. chrono::__parse_leap_seconds(__tzdb.leap_seconds, ifstream{__root / "leap-seconds.list"}); // The Standard requires the leap seconds to be sorted. The file // leap-seconds.list usually provides them in sorted order, but that is not // guaranteed so we ensure it here. std::ranges::sort(__tzdb.leap_seconds); } #ifdef _WIN32 [[nodiscard]] static const time_zone* __current_zone_windows(const tzdb& tzdb) { // TODO TZDB Implement this on Windows. std::__throw_runtime_error("unknown time zone"); } #else // ifdef _WIN32 [[nodiscard]] static const time_zone* __current_zone_posix(const tzdb& tzdb) { // On POSIX systems there are several ways to configure the time zone. // In order of priority they are: // - TZ environment variable // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08 // The documentation is unclear whether or not it's allowed to // change time zone information. For example the TZ string // MST7MDT // this is an entry in tzdata.zi. The value // MST // is also an entry. Is it allowed to use the following? // MST-3 // Even when this is valid there is no time_zone record in the // database. Since the library would need to return a valid pointer, // this means the library needs to allocate and leak a pointer. // // - The time zone name is the target of the symlink /etc/localtime // relative to /usr/share/zoneinfo/ // The algorithm is like this: // - If the environment variable TZ is set and points to a valid // record use this value. // - Else use the name based on the `/etc/localtime` symlink. if (const char* __tz = getenv("TZ")) if (const time_zone* __result = tzdb.__locate_zone(__tz)) return __result; filesystem::path __path = "/etc/localtime"; if (!std::filesystem::exists(__path)) std::__throw_runtime_error("tzdb: the symlink '/etc/localtime' does not exist"); if (!std::filesystem::is_symlink(__path)) std::__throw_runtime_error("tzdb: the path '/etc/localtime' is not a symlink"); filesystem::path __tz = filesystem::read_symlink(__path); // The path may be a relative path, in that case convert it to an absolute // path based on the proper initial directory. if (__tz.is_relative()) __tz = filesystem::canonical("/etc" / __tz); string __name = filesystem::relative(__tz, "/usr/share/zoneinfo/"); if (const time_zone* __result = tzdb.__locate_zone(__name)) return __result; std::__throw_runtime_error(("tzdb: the time zone '" + __name + "' is not found in the database").c_str()); } #endif // ifdef _WIN32 //===----------------------------------------------------------------------===// // Public API //===----------------------------------------------------------------------===// _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI tzdb_list& get_tzdb_list() { static tzdb_list __result{new tzdb_list::__impl()}; return __result; } [[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const time_zone* tzdb::__current_zone() const { #ifdef _WIN32 return chrono::__current_zone_windows(*this); #else return chrono::__current_zone_posix(*this); #endif } _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const tzdb& reload_tzdb() { if (chrono::remote_version() == chrono::get_tzdb().version) return chrono::get_tzdb(); return chrono::get_tzdb_list().__implementation().__load(); } _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI string remote_version() { filesystem::path __root = chrono::__libcpp_tzdb_directory(); ifstream __tzdata{__root / "tzdata.zi"}; return chrono::__parse_version(__tzdata); } } // namespace chrono _LIBCPP_END_NAMESPACE_STD