// std::print -*- C++ -*- // Copyright The GNU Toolchain Authors. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the // terms of the GNU General Public License as published by the // Free Software Foundation; either version 3, or (at your option) // any later version. // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // Under Section 7 of GPL version 3, you are granted additional // permissions described in the GCC Runtime Library Exception, version // 3.1, as published by the Free Software Foundation. // You should have received a copy of the GNU General Public License and // a copy of the GCC Runtime Library Exception along with this program; // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see // . #include #include #include #include #include #include // uint32_t #include #include #include #include #ifdef _WIN32 # include // _fileno # include // _get_osfhandle, _open_osfhandle, _write # include // _O_APPEND # include // GetLastError, WriteConsoleW #elifdef _GLIBCXX_HAVE_UNISTD_H # include // fileno # include // isatty #endif namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION #ifdef _WIN32 namespace { void* check_for_console(void* handle) { if (handle != nullptr && handle != INVALID_HANDLE_VALUE) { unsigned long mode; // unused if (::GetConsoleMode(handle, &mode)) return handle; } return nullptr; } } // namespace #endif // This returns intptr_t that is either a Windows HANDLE // or 1 + a POSIX file descriptor. A zero return indicates failure. void* __open_terminal([[maybe_unused]] FILE* f) { #ifndef _GLIBCXX_USE_STDIO_PURE if (f) { #ifdef _WIN32 if (int fd = ::_fileno(f); fd >= 0) return check_for_console((void*)_get_osfhandle(fd)); #elif defined _GLIBCXX_HAVE_UNISTD_H && ! defined __AVR__ if (int fd = (::fileno)(f); fd >= 0 && ::isatty(fd)) return f; #endif } #endif return nullptr; } void* __open_terminal([[maybe_unused]] std::streambuf* sb) { #if ! defined _GLIBCXX_USE_STDIO_PURE && defined __cpp_rtti using namespace __gnu_cxx; if (auto fb = dynamic_cast*>(sb)) return __open_terminal(fb->file()); if (auto fb = dynamic_cast*>(sb)) return __open_terminal(fb->file()); #ifdef __glibcxx_fstream_native_handle #ifdef _WIN32 if (auto fb = dynamic_cast(sb)) return check_for_console(fb->native_handle()); #elif defined _GLIBCXX_HAVE_UNISTD_H && ! defined __AVR__ if (auto fb = dynamic_cast(sb)) if (int fd = fb->native_handle(); fd >= 0 && ::isatty(fd)) return ::fdopen(::dup(fd), "w"); // Caller must call fclose. #endif #endif #endif // ! _GLIBCXX_USE_STDIO_PURE return nullptr; } namespace { // Validate UTF-8 string, replacing invalid sequences with U+FFFD. // // Return true if the input is valid UTF-8, false otherwise. // // If sizeof(_CharT) > 1, then transcode a valid string into out, // using either UTF-16 or UTF-32 as determined by sizeof(_CharT). // // If sizeof(_CharT) == 1 and the input is valid UTF-8, both s and out will // be unchanged. Otherwise, each invalid sequence in s will be overwritten // with a single 0xFF byte followed by zero or more 0xFE bytes, and then // a valid UTF-8 string will be produced in out (replacing invalid // sequences with U+FFFD). template bool to_valid_unicode(span s, basic_string<_CharT>& out) { constexpr bool transcode = sizeof(_CharT) > 1; unsigned seen = 0, needed = 0; unsigned char lo_bound = 0x80, hi_bound = 0xBF; size_t errors = 0; [[maybe_unused]] uint32_t code_point{}; if constexpr (transcode) { out.clear(); // XXX: count code points in s instead of bytes? out.reserve(s.size()); } auto q = s.data(), eoq = q + s.size(); while (q != eoq) { unsigned char byte = *q; if (needed == 0) { if (byte <= 0x7F) [[likely]] // 0x00 to 0x7F { if constexpr (transcode) out.push_back(_CharT(byte)); // Fast forward to the next non-ASCII character. while (++q != eoq && (unsigned char)*q <= 0x7F) { if constexpr (transcode) out.push_back(*q); } continue; } else if (byte < 0xC2) [[unlikely]] { if constexpr (transcode) out.push_back(0xFFFD); else *q = 0xFF; ++errors; } else if (byte <= 0xDF) // 0xC2 to 0xDF { needed = 1; if constexpr (transcode) code_point = byte & 0x1F; } else if (byte <= 0xEF) // 0xE0 to 0xEF { if (byte == 0xE0) lo_bound = 0xA0; else if (byte == 0xED) hi_bound = 0x9F; needed = 2; if constexpr (transcode) code_point = byte & 0x0F; } else if (byte <= 0xF4) // 0xF0 to 0xF4 { if (byte == 0xF0) lo_bound = 0x90; else if (byte == 0xF4) hi_bound = 0x8F; needed = 3; if constexpr (transcode) code_point = byte & 0x07; } else [[unlikely]] { if constexpr (transcode) out.push_back(0xFFFD); else *q = 0xFF; ++errors; } } else { if (byte < lo_bound || byte > hi_bound) [[unlikely]] { if constexpr (transcode) out.push_back(0xFFFD); else { *(q - seen - 1) = 0xFF; __builtin_memset(q - seen, 0xFE, seen); } ++errors; needed = seen = 0; lo_bound = 0x80; hi_bound = 0xBF; continue; // Reprocess the current character. } if constexpr (transcode) code_point = (code_point << 6) | (byte & 0x3f); lo_bound = 0x80; hi_bound = 0xBF; ++seen; if (seen == needed) [[likely]] { if constexpr (transcode) { if (code_point <= __gnu_cxx::__int_traits<_CharT>::__max) out.push_back(code_point); else { // Algorithm from // http://www.unicode.org/faq/utf_bom.html#utf16-4 const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10); char16_t lead = LEAD_OFFSET + (code_point >> 10); char16_t trail = 0xDC00 + (code_point & 0x3FF); out.push_back(lead); out.push_back(trail); } } needed = seen = 0; } } ++q; } if (needed) [[unlikely]] { // The string ends with an incomplete multibyte sequence. if constexpr (transcode) out.push_back(0xFFFD); else { // Truncate the incomplete sequence to a single byte. if (seen) s = s.first(s.size() - seen); s.back() = 0xFF; } ++errors; } if (errors == 0) [[likely]] return true; else if constexpr (!transcode) { out.reserve(s.size() + errors * 2); for (unsigned char byte : s) { if (byte < 0xFE) [[likely]] out += (char)byte; else if (byte == 0xFF) out += "\xef\xbf\xbd"; // U+FFFD in UTF-8 } } return false; } // Validate UTF-8 string. // Returns true if s is valid UTF-8, otherwise returns false and stores // a valid UTF-8 string in err. [[__gnu__::__always_inline__]] inline bool to_valid_utf8(span s, string& err) { return to_valid_unicode(s, err); } // Transcode UTF-8 string to UTF-16. // Returns true if s is valid UTF-8, otherwise returns false. // In either case, a valid UTF-16 string is stored in u16. [[__gnu__::__always_inline__]] inline bool to_valid_utf16(span s, u16string& u16) { return to_valid_unicode(s, u16); } } // namespace // Write a UTF-8 string to a file descriptor/handle. // Ill-formed sequences in the string will be substituted with U+FFFD. error_code __write_to_terminal(void* term, span str) { if (term == nullptr) [[unlikely]] return std::make_error_code(std::errc::invalid_argument); error_code ec; #ifdef _WIN32 // We could use std::wstring here instead of std::u16string. In general // char_traits is more optimized than char_traits but // for the purposes of to_valid_unicode only char_traits::copy matters, // and char_traits::copy uses memcpy so is OK. u16string wstr; if (!to_valid_utf16(str, wstr)) ec = std::make_error_code(errc::illegal_byte_sequence); // This allows us to test this function with a normal file, // see testsuite/27_io/print/2.cc if (!check_for_console(term)) { int fd = _open_osfhandle((intptr_t)term, _O_APPEND); if (_write(fd, wstr.data(), wstr.size() * 2) == -1) ec = {errno, generic_category()}; return ec; } unsigned long nchars = 0; WriteConsoleW(term, wstr.data(), wstr.size(), &nchars, nullptr); if (nchars != wstr.size()) return {(int)GetLastError(), system_category()}; #elifdef _GLIBCXX_HAVE_UNISTD_H string out; if (!to_valid_utf8(str, out)) { str = out; ec = std::make_error_code(errc::illegal_byte_sequence); } auto n = std::fwrite(str.data(), 1, str.size(), (FILE*)term); if (n != str.size()) ec = std::make_error_code(errc::io_error); #else ec = std::make_error_code(std::errc::function_not_supported); #endif return ec; } _GLIBCXX_END_NAMESPACE_VERSION } // namespace std