diff options
author | Patrick Palka <ppalka@redhat.com> | 2022-04-21 12:11:01 -0400 |
---|---|---|
committer | Patrick Palka <ppalka@redhat.com> | 2022-04-21 12:11:01 -0400 |
commit | 93dd7f36f2066ec52137178ee52052f293e5e743 (patch) | |
tree | 391af3af100b83dfca4c6b38fdb1d3d1f39befda /libstdc++-v3/src | |
parent | 605a80bb733b225e5e53db0f0298374213cdcd95 (diff) | |
download | gcc-93dd7f36f2066ec52137178ee52052f293e5e743.zip gcc-93dd7f36f2066ec52137178ee52052f293e5e743.tar.gz gcc-93dd7f36f2066ec52137178ee52052f293e5e743.tar.bz2 |
libstdc++: Avoid ASCII assumptions in floating_from_chars.cc
In starts_with_ci and in __floating_from_chars_hex's inf/nan handling,
we were assuming that the letters are contiguous and that 'A' + 32 == 'a'
which is true for ASCII but not for other character encodings.
This patch fixes starts_with_ci by using a constexpr lookup table that
maps uppercase letters to lowercase, and fixes __floating_from_chars_hex
by using __from_chars_alnum_to_val.
libstdc++-v3/ChangeLog:
* include/std/charconv (__from_chars_alnum_to_val_table):
Simplify initialization of __lower/__upper_letters.
(__from_chars_alnum_to_val): Default the template parameter to
false.
* src/c++17/floating_from_chars.cc (starts_with_ci): Don't
assume the uppercase and lowercase letters are contiguous.
(__floating_from_chars_hex): Likewise.
Diffstat (limited to 'libstdc++-v3/src')
-rw-r--r-- | libstdc++-v3/src/c++17/floating_from_chars.cc | 33 |
1 files changed, 25 insertions, 8 deletions
diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc b/libstdc++-v3/src/c++17/floating_from_chars.cc index 0f5183a..13de1e3 100644 --- a/libstdc++-v3/src/c++17/floating_from_chars.cc +++ b/libstdc++-v3/src/c++17/floating_from_chars.cc @@ -30,6 +30,7 @@ // Prefer to use std::pmr::string if possible, which requires the cxx11 ABI. #define _GLIBCXX_USE_CXX11_ABI 1 +#include <array> #include <charconv> #include <bit> #include <string> @@ -451,15 +452,33 @@ namespace #if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64 // Return true iff [FIRST,LAST) begins with PREFIX, ignoring case. + // PREFIX is assumed to not contain any uppercase letters. bool starts_with_ci(const char* first, const char* last, string_view prefix) { __glibcxx_requires_valid_range(first, last); - for (char ch : prefix) + // A lookup table that maps uppercase letters to lowercase and + // is otherwise the identity mapping. + static constexpr auto upper_to_lower_table = [] { + constexpr unsigned char lower_letters[27] = "abcdefghijklmnopqrstuvwxyz"; + constexpr unsigned char upper_letters[27] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + std::array<unsigned char, (1u << __CHAR_BIT__)> table = {}; + for (unsigned i = 0; i < table.size(); ++i) + table[i] = i; + for (unsigned i = 0; i < 26; ++i) + table[upper_letters[i]] = lower_letters[i]; + return table; + }(); + + if (last - first < static_cast<ptrdiff_t>(prefix.length())) + return false; + + for (const unsigned char pch : prefix) { - __glibcxx_assert(ch >= 'a' && ch <= 'z'); - if (first == last || (*first != ch && *first != ch - 32)) + // __glibcxx_assert(pch == upper_to_lower_table[pch]); + const unsigned char ch = *first; + if (ch != pch && upper_to_lower_table[ch] != pch) return false; ++first; } @@ -535,10 +554,8 @@ namespace ++first; break; } - else if ((ch >= '0' && ch <= '9') - || (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || ch == '_') + else if (ch == '_' + || __detail::__from_chars_alnum_to_val(ch) < 127) continue; else { @@ -599,7 +616,7 @@ namespace continue; } - int hexit = __detail::__from_chars_alnum_to_val<false>(ch); + int hexit = __detail::__from_chars_alnum_to_val(ch); if (hexit >= 16) break; seen_hexit = true; |