diff options
author | Patrick Palka <ppalka@redhat.com> | 2021-03-11 17:02:27 -0500 |
---|---|---|
committer | Patrick Palka <ppalka@redhat.com> | 2021-03-11 17:02:27 -0500 |
commit | c4f8e568aa66a8461ee39d5f85c2e2d41a833b7f (patch) | |
tree | ad21899bd308e7cab9bcf28c7514370a16309350 | |
parent | 349adff208618c7d18d8ab12552a02509aacef47 (diff) | |
download | gcc-c4f8e568aa66a8461ee39d5f85c2e2d41a833b7f.zip gcc-c4f8e568aa66a8461ee39d5f85c2e2d41a833b7f.tar.gz gcc-c4f8e568aa66a8461ee39d5f85c2e2d41a833b7f.tar.bz2 |
libstdc++: Add a fallback 128-bit integer class type and use it
This implements a minimal integer class type that emulates 128-bit
unsigned arithmetic using a pair of 64-bit integers, which the
floating-point std::to_chars implementation then uses as a drop-in
replacement for unsigned __int128 on targets that lack the latter.
After this patch, we now fully support formatting of large long double
types on such targets.
Since Ryu performs 128-bit division/modulus only by 2, 5 and 10, this
integer class type supports only these divisors rather than general
division/modulus.
libstdc++-v3/ChangeLog:
* src/c++17/floating_to_chars.cc: Simplify the file as if
__SIZEOF_INT128__ is always defined.
[!defined __SIZEOF_INT128__]: Include "uint128_t.h". Define
a base-10 to_chars overload for the uint128_t class type.
* src/c++17/uint128_t.h: New file.
* testsuite/20_util/to_chars/long_double.cc: No longer expect an
execution FAIL on targets that have a large long double type
but lack __int128.
-rw-r--r-- | libstdc++-v3/src/c++17/floating_to_chars.cc | 58 | ||||
-rw-r--r-- | libstdc++-v3/src/c++17/uint128_t.h | 297 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/20_util/to_chars/long_double.cc | 1 |
3 files changed, 332 insertions, 24 deletions
diff --git a/libstdc++-v3/src/c++17/floating_to_chars.cc b/libstdc++-v3/src/c++17/floating_to_chars.cc index da3fbaa..1a0abb9 100644 --- a/libstdc++-v3/src/c++17/floating_to_chars.cc +++ b/libstdc++-v3/src/c++17/floating_to_chars.cc @@ -64,25 +64,19 @@ extern "C" int __sprintfieee128(char*, const char*, ...); #if __LDBL_MANT_DIG__ == __DBL_MANT_DIG__ # define LONG_DOUBLE_KIND LDK_BINARY64 -#elif defined(__SIZEOF_INT128__) -// The Ryu routines need a 128-bit integer type in order to do shortest -// formatting of types larger than 64-bit double, so without __int128 we can't -// support any large long double format. This is the case for e.g. i386. -# if __LDBL_MANT_DIG__ == 64 +#elif __LDBL_MANT_DIG__ == 64 # define LONG_DOUBLE_KIND LDK_FLOAT80 -# elif __LDBL_MANT_DIG__ == 113 -# define LONG_DOUBLE_KIND LDK_BINARY128 -# elif __LDBL_MANT_DIG__ == 106 -# define LONG_DOUBLE_KIND LDK_IBM128 -# endif -# if defined _GLIBCXX_USE_FLOAT128 && __FLT128_MANT_DIG__ == 113 -// Define overloads of std::to_chars for __float128. -# define FLOAT128_TO_CHARS 1 -# endif +#elif __LDBL_MANT_DIG__ == 113 +# define LONG_DOUBLE_KIND LDK_BINARY128 +#elif __LDBL_MANT_DIG__ == 106 +# define LONG_DOUBLE_KIND LDK_IBM128 +#else +# define LONG_DOUBLE_KIND LDK_UNSUPPORTED #endif -#if !defined(LONG_DOUBLE_KIND) -# define LONG_DOUBLE_KIND LDK_UNSUPPORTED +#if defined _GLIBCXX_USE_FLOAT128 && __FLT128_MANT_DIG__ == 113 +// Define overloads of std::to_chars for __float128. +# define FLOAT128_TO_CHARS 1 #endif // For now we only support __float128 when it's the powerpc64 __ieee128 type. @@ -100,6 +94,8 @@ namespace { #if defined __SIZEOF_INT128__ using uint128_t = unsigned __int128; +#else +# include "uint128_t.h" #endif namespace ryu @@ -114,7 +110,6 @@ namespace #include "ryu/d2fixed.c" #include "ryu/f2s.c" -#ifdef __SIZEOF_INT128__ namespace generic128 { // Put the generic Ryu bits in their own namespace to avoid name conflicts. @@ -129,7 +124,6 @@ namespace int to_chars(const floating_decimal_128 v, char* const result) { return generic128::generic_to_chars(v, result); } -#endif } // namespace ryu // A traits class that contains pertinent information about the binary @@ -407,10 +401,8 @@ namespace return uint32_t{}; else if constexpr (total_bits <= 64) return uint64_t{}; -#ifdef __SIZEOF_INT128__ else if constexpr (total_bits <= 128) return uint128_t{}; -#endif }; using uint_t = decltype(get_uint_t()); uint_t value_bits = 0; @@ -503,7 +495,6 @@ namespace return ryu::floating_to_fd32(value); else if constexpr (std::is_same_v<T, double>) return ryu::floating_to_fd64(value); -#ifdef __SIZEOF_INT128__ else if constexpr (std::is_same_v<T, long double> || std::is_same_v<T, F128_type>) { @@ -519,7 +510,6 @@ namespace mantissa_bits, exponent_bits, !has_implicit_leading_bit); } -#endif } // This subroutine returns true if the shortest scientific form fd is a @@ -558,10 +548,32 @@ namespace get_mantissa_length(const ryu::floating_decimal_64 fd) { return ryu::decimalLength17(fd.mantissa); } -#ifdef __SIZEOF_INT128__ int get_mantissa_length(const ryu::floating_decimal_128 fd) { return ryu::generic128::decimalLength(fd.mantissa); } + +#if !defined __SIZEOF_INT128__ + // An implementation of base-10 std::to_chars for the uint128_t class type, + // used by targets that lack __int128. + std::to_chars_result + to_chars(char* first, char* const last, uint128_t x) + { + const int len = ryu::generic128::decimalLength(x); + if (last - first < len) + return {last, std::errc::value_too_large}; + if (x == 0) + { + *first++ = '0'; + return {first, std::errc{}}; + } + for (int i = 0; i < len; ++i) + { + first[len - 1 - i] = '0' + static_cast<char>(x % 10); + x /= 10; + } + __glibcxx_assert(x == 0); + return {first + len, std::errc{}}; + } #endif } // anon namespace diff --git a/libstdc++-v3/src/c++17/uint128_t.h b/libstdc++-v3/src/c++17/uint128_t.h new file mode 100644 index 0000000..e3c2f19 --- /dev/null +++ b/libstdc++-v3/src/c++17/uint128_t.h @@ -0,0 +1,297 @@ +// A relatively minimal unsigned 128-bit integer class type, used by the +// floating-point std::to_chars implementation on targets that lack __int128. + +// Copyright (C) 2021 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +struct uint128_t +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + uint64_t lo, hi; +#else + uint64_t hi, lo; +#endif + + uint128_t() = default; + + constexpr + uint128_t(uint64_t lo, uint64_t hi = 0) +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + : lo(lo), hi(hi) +#else + : hi(hi), lo(lo) +#endif + { } + + constexpr explicit + operator bool() const + { return *this != 0; } + + template<typename T, typename = std::enable_if_t<std::is_integral_v<T>>> + constexpr explicit + operator T() const + { + static_assert(sizeof(T) <= sizeof(uint64_t)); + return static_cast<T>(lo); + } + + friend constexpr uint128_t + operator&(uint128_t x, const uint128_t y) + { + x.lo &= y.lo; + x.hi &= y.hi; + return x; + } + + friend constexpr uint128_t + operator|(uint128_t x, const uint128_t y) + { + x.lo |= y.lo; + x.hi |= y.hi; + return x; + } + + friend constexpr uint128_t + operator<<(uint128_t x, const uint128_t y) + { + __glibcxx_assert(y < 128); + // TODO: Convince GCC to use shldq on x86 here. + if (y.lo >= 64) + { + x.hi = x.lo << (y.lo - 64); + x.lo = 0; + } + else if (y.lo != 0) + { + x.hi <<= y.lo; + x.hi |= x.lo >> (64 - y.lo); + x.lo <<= y.lo; + } + return x; + } + + friend constexpr uint128_t + operator>>(uint128_t x, const uint128_t y) + { + __glibcxx_assert(y < 128); + // TODO: Convince GCC to use shrdq on x86 here. + if (y.lo >= 64) + { + x.lo = x.hi >> (y.lo - 64); + x.hi = 0; + } + else if (y.lo != 0) + { + x.lo >>= y.lo; + x.lo |= x.hi << (64 - y.lo); + x.hi >>= y.lo; + } + return x; + } + + constexpr uint128_t + operator~() const + { return {~lo, ~hi}; } + + constexpr uint128_t + operator-() const + { return operator~() + 1; } + + friend constexpr uint128_t + operator+(uint128_t x, const uint128_t y) + { + x.hi += __builtin_add_overflow(x.lo, y.lo, &x.lo); + x.hi += y.hi; + return x; + } + + friend constexpr uint128_t + operator-(uint128_t x, const uint128_t y) + { + x.hi -= __builtin_sub_overflow(x.lo, y.lo, &x.lo); + x.hi -= y.hi; + return x; + } + + static constexpr uint128_t + umul64_64_128(const uint64_t x, const uint64_t y) + { + const uint64_t xl = x & 0xffffffff; + const uint64_t xh = x >> 32; + const uint64_t yl = y & 0xffffffff; + const uint64_t yh = y >> 32; + const uint64_t ll = xl * yl; + const uint64_t lh = xl * yh; + const uint64_t hl = xh * yl; + const uint64_t hh = xh * yh; + const uint64_t m = (ll >> 32) + lh + (hl & 0xffffffff); + const uint64_t l = (ll & 0xffffffff ) | (m << 32); + const uint64_t h = (m >> 32) + (hl >> 32) + hh; + return {l, h}; + } + + friend constexpr uint128_t + operator*(const uint128_t x, const uint128_t y) + { + uint128_t z = umul64_64_128(x.lo, y.lo); + z.hi += x.lo * y.hi + x.hi * y.lo; + return z; + } + + friend constexpr uint128_t + operator/(const uint128_t x, const uint128_t y) + { + // Ryu performs 128-bit division only by 5 and 10, so that's what we + // implement. The strategy here is to relate division of x with that of + // x.hi and x.lo separately. + __glibcxx_assert(y == 5 || y == 10); + // The following implements division by 5 and 10. In either case, we + // first compute division by 5: + // x/5 = (x.hi*2^64 + x.lo)/5 + // = (x.hi*(2^64-1) + x.hi + x.lo)/5 + // = x.hi*((2^64-1)/5) + (x.hi + x.lo)/5 since CST=(2^64-1)/5 is exact + // = x.hi*CST + x.hi/5 + x.lo/5 + ((x.lo%5) + (x.hi%5) >= 5) + // We go a step further and replace the last adjustment term with a + // lookup table, which we encode as a binary literal. This seems to + // yield smaller code on x86 at least. + constexpr auto cst = ~uint64_t(0) / 5; + uint128_t q = uint128_t{x.hi}*cst + uint128_t{x.hi/5 + x.lo/5}; + constexpr auto lookup = 0b111100000u; + q += (lookup >> ((x.hi % 5) + (x.lo % 5))) & 1; + if (y == 10) + q >>= 1; + return q; + } + + friend constexpr uint128_t + operator%(const uint128_t x, const uint128_t y) + { + // Ryu performs 128-bit modulus only by 2, 5 and 10, so that's what we + // implement. The strategy here is to relate modulus of x with that of + // x.hi and x.lo separately. + if (y == 2) + return x & 1; + __glibcxx_assert(y == 5 || y == 10); + // The following implements modulus by 5 and 10. In either case, + // we first compute modulus by 5: + // x (mod 5) = x.hi*2^64 + x.lo (mod 5) + // = x.hi + x.lo (mod 5) since 2^64 ≡ 1 (mod 5) + // So the straightforward implementation would be + // ((x.hi % 5) + (x.lo % 5)) % 5 + // But we go a step further and replace the outermost % with a + // lookup table: + // = {0,1,2,3,4,0,1,2,3}[(x.hi % 5) + (x.lo % 5)] (mod 5) + // which we encode as an octal literal. + constexpr auto lookup = 0321043210u; + auto r = (lookup >> 3*((x.hi % 5) + (x.lo % 5))) & 7; + if (y == 10) + // x % 10 = (x % 5) if x / 5 is even + // (x % 5) + 5 if x / 5 is odd + // The compiler should be able to CSE the below computation of x/5 and + // the above modulus operations with a nearby inlined computation of x/10. + r += 5 * ((x/5).lo & 1); + return r; + } + + friend constexpr bool + operator==(const uint128_t x, const uint128_t y) + { return x.hi == y.hi && x.lo == y.lo; } + + friend constexpr bool + operator<(const uint128_t x, const uint128_t y) + { return x.hi < y.hi || (x.hi == y.hi && x.lo < y.lo); } + + friend constexpr auto + __bit_width(const uint128_t x) + { + if (auto w = std::__bit_width(x.hi)) + return w + 64; + else + return std::__bit_width(x.lo); + } + + friend constexpr auto + __countr_zero(const uint128_t x) + { + auto c = std::__countr_zero(x.lo); + if (c == 64) + return 64 + std::__countr_zero(x.hi); + else + return c; + } + + constexpr uint128_t& + operator--() + { return *this -= 1; } + + constexpr uint128_t& + operator++() + { return *this += 1; } + + constexpr uint128_t& + operator+=(const uint128_t y) + { return *this = *this + y; } + + constexpr uint128_t& + operator-=(const uint128_t y) + { return *this = *this - y; } + + constexpr uint128_t& + operator*=(const uint128_t y) + { return *this = *this * y; } + + constexpr uint128_t& + operator<<=(const uint128_t y) + { return *this = *this << y; } + + constexpr uint128_t& + operator>>=(const uint128_t y) + { return *this = *this >> y; } + + constexpr uint128_t& + operator|=(const uint128_t y) + { return *this = *this | y; } + + constexpr uint128_t& + operator&=(const uint128_t y) + { return *this = *this & y; } + + constexpr uint128_t& + operator%=(const uint128_t y) + { return *this = *this % y; } + + constexpr uint128_t& + operator/=(const uint128_t y) + { return *this = *this / y; } + + friend constexpr bool + operator!=(const uint128_t x, const uint128_t y) + { return !(x == y); } + + friend constexpr bool + operator>(const uint128_t x, const uint128_t y) + { return y < x; } + + friend constexpr bool + operator>=(const uint128_t x, const uint128_t y) + { return !(x < y); } +}; diff --git a/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc b/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc index da847ae..5c1f713 100644 --- a/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc +++ b/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc @@ -18,7 +18,6 @@ // <charconv> is supported in C++14 as a GNU extension, but this test uses C++17 // hexadecimal floating-point literals. // { dg-do run { target c++17 } } -// { dg-xfail-run-if "Ryu needs __int128" { large_long_double && { ! int128 } } } // { dg-require-effective-target ieee-floats } #include <charconv> |