//===-- Utilities to convert integral values to string ----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Converts an integer to a string. // // By default, the string is written as decimal to an internal buffer and // accessed via the 'view' method. // // IntegerToString buffer(42); // cpp::string_view view = buffer.view(); // // The buffer is allocated on the stack and its size is so that the conversion // always succeeds. // // It is also possible to write the data to a preallocated buffer, but this may // fail. // // char buffer[8]; // if (auto maybe_view = IntegerToString::write_to_span(buffer, 42)) { // cpp::string_view view = *maybe_view; // } // // The first template parameter is the type of the integer. // The second template parameter defines how the integer is formatted. // Available default are 'radix::Bin', 'radix::Oct', 'radix::Dec' and // 'radix::Hex'. // // For 'radix::Bin', 'radix::Oct' and 'radix::Hex' the value is always // interpreted as a positive type but 'radix::Dec' will honor negative values. // e.g., // // IntegerToString(-1) // "-1" // IntegerToString(-1) // "-1" // IntegerToString(-1) // "11111111" // IntegerToString(-1) // "377" // IntegerToString(-1) // "ff" // // Additionnally, the format can be changed by navigating the subtypes: // - WithPrefix : Adds "0b", "0", "0x" for binary, octal and hexadecimal // - WithWidth : Pad string to XX characters filling leading digits with 0 // - Uppercase : Use uppercase letters (only for HexString) // - WithSign : Prepend '+' for positive values (only for DecString) // // Examples // -------- // IntegerToString::WithSign>(0) : "+00" // IntegerToString::WithSign>(-1) : "-01" // IntegerToString(255) : "0xFF" // IntegerToString::Uppercase>(255) : "00FF" //===----------------------------------------------------------------------===// #ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H #define LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H #include "hdr/stdint_proxy.h" #include "src/__support/CPP/algorithm.h" // max #include "src/__support/CPP/array.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/limits.h" #include "src/__support/CPP/optional.h" #include "src/__support/CPP/span.h" #include "src/__support/CPP/string_view.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { namespace details { template struct Fmt { static constexpr uint8_t BASE = base; static constexpr size_t MIN_DIGITS = min_digits; static constexpr bool IS_UPPERCASE = is_uppercase; static constexpr bool PREFIX = prefix; static constexpr char FORCE_SIGN = force_sign; using WithPrefix = Fmt; using WithSign = Fmt; using Uppercase = Fmt; template using WithWidth = Fmt; // Invariants static constexpr uint8_t NUMERICAL_DIGITS = 10; static constexpr uint8_t ALPHA_DIGITS = 26; static constexpr uint8_t MAX_DIGIT = NUMERICAL_DIGITS + ALPHA_DIGITS; static_assert(BASE > 1 && BASE <= MAX_DIGIT); static_assert(!IS_UPPERCASE || BASE > 10, "Uppercase is only for radix > 10"); static_assert(!FORCE_SIGN || BASE == 10, "WithSign is only for radix == 10"); static_assert(!PREFIX || (BASE == 2 || BASE == 8 || BASE == 16), "WithPrefix is only for radix == 2, 8 or 16"); }; // Move this to a separate header since it might be useful elsewhere. template class StringBufferWriterImpl { cpp::span buffer; size_t index = 0; bool out_of_range = false; LIBC_INLINE size_t location() const { return forward ? index : buffer.size() - 1 - index; } public: StringBufferWriterImpl(const StringBufferWriterImpl &) = delete; StringBufferWriterImpl(cpp::span buffer) : buffer(buffer) {} LIBC_INLINE size_t size() const { return index; } LIBC_INLINE size_t remainder_size() const { return buffer.size() - size(); } LIBC_INLINE bool empty() const { return size() == 0; } LIBC_INLINE bool full() const { return size() == buffer.size(); } LIBC_INLINE bool ok() const { return !out_of_range; } LIBC_INLINE StringBufferWriterImpl &push(char c) { if (ok()) { if (!full()) { buffer[location()] = c; ++index; } else { out_of_range = true; } } return *this; } LIBC_INLINE cpp::span remainder_span() const { return forward ? buffer.last(remainder_size()) : buffer.first(remainder_size()); } LIBC_INLINE cpp::span buffer_span() const { return forward ? buffer.first(size()) : buffer.last(size()); } LIBC_INLINE cpp::string_view buffer_view() const { const auto s = buffer_span(); return {s.data(), s.size()}; } }; using StringBufferWriter = StringBufferWriterImpl; using BackwardStringBufferWriter = StringBufferWriterImpl; } // namespace details namespace radix { using Bin = details::Fmt<2>; using Oct = details::Fmt<8>; using Dec = details::Fmt<10>; using Hex = details::Fmt<16>; template using Custom = details::Fmt; } // namespace radix // Extract the low-order decimal digit from a value of integer type T. The // returned value is the digit itself, from 0 to 9. The input value is passed // by reference, and modified by dividing by 10, so that iterating this // function extracts all the digits of the original number one at a time from // low to high. template LIBC_INLINE cpp::enable_if_t, uint8_t> extract_decimal_digit(T &value) { const uint8_t digit(static_cast(value % 10)); // For built-in integer types, we assume that an adequately fast division is // available. If hardware division isn't implemented, then with a divisor // known at compile time the compiler might be able to generate an optimized // sequence instead. value /= 10; return digit; } // A specialization of extract_decimal_digit for the BigInt type in big_int.h, // avoiding the use of general-purpose BigInt division which is very slow. template LIBC_INLINE cpp::enable_if_t, uint8_t> extract_decimal_digit(T &value) { // There are two essential ways you can turn n into (n/10,n%10). One is // ordinary integer division. The other is a modular-arithmetic approach in // which you first compute n%10 by bit twiddling, then subtract it off to get // a value that is definitely a multiple of 10. Then you divide that by 10 in // two steps: shift right to divide off a factor of 2, and then divide off a // factor of 5 by multiplying by the modular inverse of 5 mod 2^BITS. (That // last step only works if you know there's no remainder, which is why you // had to subtract off the output digit first.) // // Either approach can be made to work in linear time. This code uses the // modular-arithmetic technique, because the other approach either does a lot // of integer divisions (requiring a fast hardware divider), or else uses a // "multiply by an approximation to the reciprocal" technique which depends // on careful error analysis which might go wrong in an untested edge case. using Word = typename T::word_type; // Find the remainder (value % 10). We do this by breaking up the input // integer into chunks of size WORD_SIZE/2, so that the sum of them doesn't // overflow a Word. Then we sum all the half-words times 6, except the bottom // one, which is added to that sum without scaling. // // Why 6? Because you can imagine that the original number had the form // // halfwords[0] + K*halfwords[1] + K^2*halfwords[2] + ... // // where K = 2^(WORD_SIZE/2). Since WORD_SIZE is expected to be a multiple of // 8, that makes WORD_SIZE/2 a multiple of 4, so that K is a power of 16. And // all powers of 16 (larger than 1) are congruent to 6 mod 10, by induction: // 16 itself is, and 6^2=36 is also congruent to 6. Word acc_remainder = 0; constexpr Word HALFWORD_BITS = T::WORD_SIZE / 2; constexpr Word HALFWORD_MASK = ((Word(1) << HALFWORD_BITS) - 1); // Sum both halves of all words except the low one. for (size_t i = 1; i < T::WORD_COUNT; i++) { acc_remainder += value.val[i] >> HALFWORD_BITS; acc_remainder += value.val[i] & HALFWORD_MASK; } // Add the high half of the low word. Then we have everything that needs to // be multiplied by 6, so do that. acc_remainder += value.val[0] >> HALFWORD_BITS; acc_remainder *= 6; // Having multiplied it by 6, add the lowest half-word, and then reduce mod // 10 by normal integer division to finish. acc_remainder += value.val[0] & HALFWORD_MASK; uint8_t digit = static_cast(acc_remainder % 10); // Now we have the output digit. Subtract it from the input value, and shift // right to divide by 2. value -= digit; value >>= 1; // Now all that's left is to multiply by the inverse of 5 mod 2^BITS. No // matter what the value of BITS, the inverse of 5 has the very convenient // form 0xCCCC...CCCD, with as many C hex digits in the middle as necessary. // // We could construct a second BigInt with all words 0xCCCCCCCCCCCCCCCC, // increment the bottom word, and call a general-purpose multiply function. // But we can do better, by taking advantage of the regularity: we can do // this particular operation in linear time, whereas a general multiplier // would take superlinear time (quadratic in small cases). // // To begin with, instead of computing n*0xCCCC...CCCD, we'll compute // n*0xCCCC...CCCC and then add it to the original n. Then all the words of // the multiplier have the same value 0xCCCCCCCCCCCCCCCC, which I'll just // denote as C. If we also write t = 2^WORD_SIZE, and imagine (as an example) // that the input number has three words x,y,z with x being the low word, // then we're computing // // (x + y t + z t^2) * (C + C t + C t^2) // // = x C + y C t + z C t^2 // + x C t + y C t^2 + z C t^3 // + x C t^2 + y C t^3 + z C t^4 // // but we're working mod t^3, so the high-order terms vanish and this becomes // // x C + y C t + z C t^2 // + x C t + y C t^2 // + x C t^2 // // = x C + (x+y) C t + (x+y+z) C t^2 // // So all you have to do is to work from the low word of the integer upwards, // accumulating C times the sum of all the words you've seen so far to get // x*C, (x+y)*C, (x+y+z)*C and so on. In each step you add another product to // the accumulator, and add the accumulator to the corresponding word of the // original number (so that we end up with value*CCCD, not just value*CCCC). // // If you do that literally, then your accumulator has to be three words // wide, because the sum of words can overflow into a second word, and // multiplying by C adds another word. But we can do slightly better by // breaking each product word*C up into a bottom half and a top half. If we // write x*C = xl + xh*t, and similarly for y and z, then our sum becomes // // (xl + xh t) + (yl + yh t) t + (zl + zh t) t^2 // + (xl + xh t) t + (yl + yh t) t^2 // + (xl + xh t) t^2 // // and if you expand out again, collect terms, and discard t^3 terms, you get // // (xl) // + (xl + xh + yl) t // + (xl + xh + yl + yh + zl) t^2 // // in which each coefficient is the sum of all the low words of the products // up to _and including_ the current word, plus all the high words up to but // _not_ including the current word. So now you only have to retain two words // of sum instead of three. // // We do this entire procedure in a single in-place pass over the input // number, reading each word to make its product with C and then adding the // low word of the accumulator to it. constexpr Word C = Word(-1) / 5 * 4; // calculate 0xCCCC as 4/5 of 0xFFFF Word acc_lo = 0, acc_hi = 0; // accumulator of all the half-products so far Word carry_bit, carry_word = 0; for (size_t i = 0; i < T::WORD_COUNT; i++) { // Make the two-word product of C with the current input word. multiword::DoubleWide product = multiword::mul2(C, value.val[i]); // Add the low half of the product to our accumulator, but not yet the high // half. acc_lo = add_with_carry(acc_lo, product[0], 0, carry_bit); acc_hi += carry_bit; // Now the accumulator contains exactly the value we need to add to the // current input word. Add it, plus any carries from lower words, and make // a new word of carry data to propagate into the next iteration. value.val[i] = add_with_carry(value.val[i], carry_word, 0, carry_bit); carry_word = acc_hi + carry_bit; value.val[i] = add_with_carry(value.val[i], acc_lo, 0, carry_bit); carry_word += carry_bit; // Now add the high half of the current product to our accumulator. acc_lo = add_with_carry(acc_lo, product[1], 0, carry_bit); acc_hi += carry_bit; } return digit; } // See file header for documentation. template class IntegerToString { static_assert(cpp::is_integral_v || is_big_int_v); LIBC_INLINE static constexpr size_t compute_buffer_size() { constexpr auto MAX_DIGITS = []() -> size_t { // We size the string buffer for base 10 using an approximation algorithm: // // size = ceil(sizeof(T) * 5 / 2) // // If sizeof(T) is 1, then size is 3 (actually need 3) // If sizeof(T) is 2, then size is 5 (actually need 5) // If sizeof(T) is 4, then size is 10 (actually need 10) // If sizeof(T) is 8, then size is 20 (actually need 20) // If sizeof(T) is 16, then size is 40 (actually need 39) // // NOTE: The ceil operation is actually implemented as // floor(((sizeof(T) * 5) + 1) / 2) // where floor operation is just integer division. // // This estimation grows slightly faster than the actual value, but the // overhead is small enough to tolerate. if constexpr (Fmt::BASE == 10) return ((sizeof(T) * 5) + 1) / 2; // For other bases, we approximate by rounding down to the nearest power // of two base, since the space needed is easy to calculate and it won't // overestimate by too much. constexpr auto FLOOR_LOG_2 = [](size_t num) -> size_t { size_t i = 0; for (; num > 1; num /= 2) ++i; return i; }; constexpr size_t BITS_PER_DIGIT = FLOOR_LOG_2(Fmt::BASE); return ((sizeof(T) * 8 + (BITS_PER_DIGIT - 1)) / BITS_PER_DIGIT); }; constexpr size_t DIGIT_SIZE = cpp::max(MAX_DIGITS(), Fmt::MIN_DIGITS); constexpr size_t SIGN_SIZE = Fmt::BASE == 10 ? 1 : 0; constexpr size_t PREFIX_SIZE = Fmt::PREFIX ? 2 : 0; return DIGIT_SIZE + SIGN_SIZE + PREFIX_SIZE; } static constexpr size_t BUFFER_SIZE = compute_buffer_size(); static_assert(BUFFER_SIZE > 0); // An internal stateless structure that handles the number formatting logic. struct IntegerWriter { static_assert(cpp::is_integral_v || is_big_int_v); using UNSIGNED_T = make_integral_or_big_int_unsigned_t; LIBC_INLINE static char digit_char(uint8_t digit) { const char result = internal::int_to_b36_char(digit); return Fmt::IS_UPPERCASE ? internal::toupper(result) : result; } LIBC_INLINE static void write_unsigned_number(UNSIGNED_T value, details::BackwardStringBufferWriter &sink) { for (; sink.ok() && value != 0; value /= Fmt::BASE) { const uint8_t digit(static_cast(value % Fmt::BASE)); sink.push(digit_char(digit)); } } LIBC_INLINE static void write_unsigned_number_dec(UNSIGNED_T value, details::BackwardStringBufferWriter &sink) { while (sink.ok() && value != 0) { const uint8_t digit = extract_decimal_digit(value); sink.push(digit_char(digit)); } } // Returns the absolute value of 'value' as 'UNSIGNED_T'. LIBC_INLINE static UNSIGNED_T abs(T value) { if (cpp::is_unsigned_v || value >= 0) return static_cast(value); // already of the right sign. // Signed integers are asymmetric (e.g., int8_t ∈ [-128, 127]). // Thus negating the type's minimum value would overflow. // From C++20 on, signed types are guaranteed to be represented as 2's // complement. We take advantage of this representation and negate the // value by using the exact same bit representation, e.g., // binary : 0b1000'0000 // int8_t : -128 // uint8_t: 128 // Note: the compiler can completely optimize out the two branches and // replace them by a simple negate instruction. // https://godbolt.org/z/hE7zahT9W if (value == cpp::numeric_limits::min()) { return cpp::bit_cast(value); } else { return static_cast( -value); // legal and representable both as T and UNSIGNED_T.` } } LIBC_INLINE static void write(T value, details::BackwardStringBufferWriter &sink) { if constexpr (Fmt::BASE == 10) { write_unsigned_number_dec(abs(value), sink); } else { write_unsigned_number(static_cast(value), sink); } // width while (sink.ok() && sink.size() < Fmt::MIN_DIGITS) sink.push('0'); // sign if constexpr (Fmt::BASE == 10) { if (value < 0) sink.push('-'); else if (Fmt::FORCE_SIGN) sink.push('+'); } // prefix if constexpr (Fmt::PREFIX) { if constexpr (Fmt::BASE == 2) { sink.push('b'); sink.push('0'); } if constexpr (Fmt::BASE == 16) { sink.push('x'); sink.push('0'); } if constexpr (Fmt::BASE == 8) { const cpp::string_view written = sink.buffer_view(); if (written.empty() || written.front() != '0') sink.push('0'); } } } }; cpp::array array; size_t written = 0; public: IntegerToString(const IntegerToString &) = delete; IntegerToString(T value) { details::BackwardStringBufferWriter writer(array); IntegerWriter::write(value, writer); written = writer.size(); } [[nodiscard]] LIBC_INLINE static cpp::optional format_to(cpp::span buffer, T value) { details::BackwardStringBufferWriter writer(buffer); IntegerWriter::write(value, writer); if (writer.ok()) return cpp::string_view(buffer.data() + buffer.size() - writer.size(), writer.size()); return cpp::nullopt; } LIBC_INLINE static constexpr size_t buffer_size() { return BUFFER_SIZE; } LIBC_INLINE size_t size() const { return written; } LIBC_INLINE cpp::string_view view() && = delete; LIBC_INLINE cpp::string_view view() const & { return cpp::string_view(array.data() + array.size() - size(), size()); } }; } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H