diff options
author | Jonathan Wakely <jwakely@redhat.com> | 2023-12-14 23:23:34 +0000 |
---|---|---|
committer | Jonathan Wakely <jwakely@redhat.com> | 2023-12-14 23:59:24 +0000 |
commit | fe54b57728c09ab0389e2bb3f079d5210566199d (patch) | |
tree | 1007a2d3e0e8d60906e2a21ae483ef36928f17c1 /libstdc++-v3/src | |
parent | 29ad35a1db645f6027acc4f2a9b15363f402ca97 (diff) | |
download | gcc-fe54b57728c09ab0389e2bb3f079d5210566199d.zip gcc-fe54b57728c09ab0389e2bb3f079d5210566199d.tar.gz gcc-fe54b57728c09ab0389e2bb3f079d5210566199d.tar.bz2 |
libstdc++: Implement C++23 <print> header [PR107760]
This adds the C++23 std::print functions, which use std::format to write
to a FILE stream or std::ostream (defaulting to stdout).
The new extern symbols are in the libstdc++exp.a archive, so we aren't
committing to stable symbols in the DSO yet. There's a UTF-8 validating
and transcoding function added by this change. That can certainly be
optimized, but it's internal to libstdc++exp.a so can be tweaked later
at leisure.
Currently the external symbols work for all targets, but are only
actually used for Windows, where it's necessary to transcode to UTF-16
to write to the console. The standard seems to encourage us to also
diagnose invalid UTF-8 for non-Windows targets when writing to a
terminal (and only when writing to a terminal), but I'm reliably
informed that that wasn't the intent of the wording. Checking for
invalid UTF-8 sequences only needs to happen for Windows, which is good
as checking for a terminal requires a call to isatty, and on Linux that
uses an ioctl syscall, which would make std::print ten times slower!
Testing the std::print behaviour is difficult if it depends on whether
the output stream is connected to a Windows console or not, as we can't
(as far as I know) do that non-interactively in DejaGNU. One of the new
tests uses the internal __write_to_terminal function directly. That
allows us to verify its UTF-8 error handling on POSIX targets, even
though that's not actually used by std::print. For Windows, that
__write_to_terminal function transcodes to UTF-16 but then uses
WriteConsoleW which fails unless it really is writing to the console.
That means the 27_io/print/2.cc test FAILs on Windows. The UTF-16
transcoding has been manually tested using mingw-w64 and Wine, and
appears to work.
libstdc++-v3/ChangeLog:
PR libstdc++/107760
* include/Makefile.am: Add new header.
* include/Makefile.in: Regenerate.
* include/bits/version.def (__cpp_lib_print): Define.
* include/bits/version.h: Regenerate.
* include/std/format (__literal_encoding_is_utf8): New function.
(_Seq_sink::view()): New member function.
* include/std/ostream (vprintf_nonunicode, vprintf_unicode)
(print, println): New functions.
* include/std/print: New file.
* src/c++23/Makefile.am: Add new source file.
* src/c++23/Makefile.in: Regenerate.
* src/c++23/print.cc: New file.
* testsuite/27_io/basic_ostream/print/1.cc: New test.
* testsuite/27_io/print/1.cc: New test.
* testsuite/27_io/print/2.cc: New test.
Diffstat (limited to 'libstdc++-v3/src')
-rw-r--r-- | libstdc++-v3/src/c++23/Makefile.am | 8 | ||||
-rw-r--r-- | libstdc++-v3/src/c++23/Makefile.in | 10 | ||||
-rw-r--r-- | libstdc++-v3/src/c++23/print.cc | 348 |
3 files changed, 363 insertions, 3 deletions
diff --git a/libstdc++-v3/src/c++23/Makefile.am b/libstdc++-v3/src/c++23/Makefile.am index da988c3..7693875 100644 --- a/libstdc++-v3/src/c++23/Makefile.am +++ b/libstdc++-v3/src/c++23/Makefile.am @@ -35,7 +35,7 @@ else inst_sources = endif -sources = stacktrace.cc +sources = stacktrace.cc print.cc vpath % $(top_srcdir)/src/c++23 @@ -46,6 +46,12 @@ else libc__23convenience_la_SOURCES = endif +# Use C++26 so that std::filebuf::native_handle() is available. +print.lo: print.cc + $(LTCXXCOMPILE) -std=gnu++26 -c $< +print.o: print.cc + $(CXXCOMPILE) -std=gnu++26 -c $< + # AM_CXXFLAGS needs to be in each subdirectory so that it can be # modified in a per-library or per-sub-library way. Need to manually # set this option because CONFIG_CXXFLAGS has to be after diff --git a/libstdc++-v3/src/c++23/Makefile.in b/libstdc++-v3/src/c++23/Makefile.in index 1121749..ce60968 100644 --- a/libstdc++-v3/src/c++23/Makefile.in +++ b/libstdc++-v3/src/c++23/Makefile.in @@ -121,7 +121,7 @@ CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = LTLIBRARIES = $(noinst_LTLIBRARIES) libc__23convenience_la_LIBADD = -am__objects_1 = stacktrace.lo +am__objects_1 = stacktrace.lo print.lo am__objects_2 = @GLIBCXX_HOSTED_TRUE@am_libc__23convenience_la_OBJECTS = \ @GLIBCXX_HOSTED_TRUE@ $(am__objects_1) $(am__objects_2) @@ -430,7 +430,7 @@ headers = # XTEMPLATE_FLAGS = -fno-implicit-templates @ENABLE_EXTERN_TEMPLATE_TRUE@inst_sources = -sources = stacktrace.cc +sources = stacktrace.cc print.cc @GLIBCXX_HOSTED_FALSE@libc__23convenience_la_SOURCES = @GLIBCXX_HOSTED_TRUE@libc__23convenience_la_SOURCES = $(sources) $(inst_sources) @@ -742,6 +742,12 @@ uninstall-am: vpath % $(top_srcdir)/src/c++23 +# Use C++26 so that std::filebuf::native_handle() is available. +print.lo: print.cc + $(LTCXXCOMPILE) -std=gnu++26 -c $< +print.o: print.cc + $(CXXCOMPILE) -std=gnu++26 -c $< + # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/libstdc++-v3/src/c++23/print.cc b/libstdc++-v3/src/c++23/print.cc new file mode 100644 index 0000000..2fe7a2e --- /dev/null +++ b/libstdc++-v3/src/c++23/print.cc @@ -0,0 +1,348 @@ +// std::print -*- C++ -*- + +// Copyright The GNU Toolchain Authors. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +#include <span> +#include <string> +#include <streambuf> +#include <system_error> +#include <cstdio> +#include <cstdint> // uint32_t +#include <fstream> +#include <ext/stdio_filebuf.h> +#include <ext/stdio_sync_filebuf.h> +#include <ext/numeric_traits.h> + +#ifdef _WIN32 +# include <stdio.h> // _fileno +# include <io.h> // _get_osfhandle +# include <windows.h> // GetLastError, WriteConsoleW +#elifdef _GLIBCXX_HAVE_UNISTD_H +# include <stdio.h> // fileno +# include <unistd.h> // isatty +#endif + +namespace std _GLIBCXX_VISIBILITY(default) +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + +#ifdef _WIN32 +namespace +{ + void* + check_for_console(void* handle) + { + if (handle != nullptr && handle != INVALID_HANDLE_VALUE) + { + unsigned long mode; // unused + if (::GetConsoleMode(handle, &mode)) + return handle; + } + return nullptr; + } +} // namespace +#endif + + // This returns intptr_t that is either a Windows HANDLE + // or 1 + a POSIX file descriptor. A zero return indicates failure. + void* + __open_terminal(FILE* f) + { +#ifndef _GLIBCXX_USE_STDIO_PURE + if (f) + { +#ifdef _WIN32 + if (int fd = ::_fileno(f); fd >= 0) + return check_for_console((void*)_get_osfhandle(fd)); +#elifdef _GLIBCXX_HAVE_UNISTD_H + if (int fd = ::fileno(f); fd >= 0 && ::isatty(fd)) + return f; +#endif + } +#endif + return nullptr; + } + + void* + __open_terminal(std::streambuf* sb) + { +#ifndef _GLIBCXX_USE_STDIO_PURE + using namespace __gnu_cxx; + + if (auto fb = dynamic_cast<stdio_sync_filebuf<char>*>(sb)) + return __open_terminal(fb->file()); + + if (auto fb = dynamic_cast<stdio_filebuf<char>*>(sb)) + return __open_terminal(fb->file()); + +#ifdef __glibcxx_fstream_native_handle +#ifdef _WIN32 + if (auto fb = dynamic_cast<filebuf*>(sb)) + return check_for_console(fb->native_handle()); +#elifdef _GLIBCXX_HAVE_UNISTD_H + if (auto fb = dynamic_cast<filebuf*>(sb)) + if (int fd = fb->native_handle(); fd >= 0 && ::isatty(fd)) + return ::fdopen(::dup(fd), "w"); // Caller must call fclose. +#endif +#endif +#endif // ! _GLIBCXX_USE_STDIO_PURE + + return nullptr; + } + +namespace +{ + // Validate UTF-8 string, replacing invalid sequences with U+FFFD. + // + // Return true if the input is valid UTF-8, false otherwise. + // + // If sizeof(_CharT) > 1, then transcode a valid string into out, + // using either UTF-16 or UTF-32 as determined by sizeof(_CharT). + // + // If sizeof(_CharT) == 1 and the input is valid UTF-8, both s and out will + // be unchanged. Otherwise, each invalid sequence in s will be overwritten + // with a single 0xFF byte followed by zero or more 0xFE bytes, and then + // a valid UTF-8 string will be produced in out (replacing invalid + // sequences with U+FFFD). + template<typename _CharT> + bool + to_valid_unicode(span<char> s, basic_string<_CharT>& out) + { + constexpr bool transcode = sizeof(_CharT) > 1; + + unsigned seen = 0, needed = 0; + unsigned char lo_bound = 0x80, hi_bound = 0xBF; + size_t errors = 0; + + [[maybe_unused]] uint32_t code_point{}; + if constexpr (transcode) + { + out.clear(); + // XXX: count code points in s instead of bytes? + out.reserve(s.size()); + } + + auto q = s.data(), eoq = q + s.size(); + while (q != eoq) + { + unsigned char byte = *q; + if (needed == 0) + { + if (byte <= 0x7F) [[likely]] // 0x00 to 0x7F + { + if constexpr (transcode) + out.push_back(_CharT(byte)); + + // Fast forward to the next non-ASCII character. + while (++q != eoq && (unsigned char)*q <= 0x7F) + { + if constexpr (transcode) + out.push_back(*q); + } + continue; + } + else if (byte < 0xC2) + { + if constexpr (transcode) + out.push_back(0xFFFD); + else + *q = 0xFF; + ++errors; + } + else if (byte <= 0xDF) // 0xC2 to 0xDF + { + needed = 1; + if constexpr (transcode) + code_point = byte & 0x1F; + } + else if (byte <= 0xEF) // 0xE0 to 0xEF + { + if (byte == 0xE0) + lo_bound = 0xA0; + else if (byte == 0xED) + hi_bound = 0x9F; + + needed = 2; + if constexpr (transcode) + code_point = byte & 0x0F; + } + else if (byte <= 0xF4) // 0xF0 to 0xF4 + { + if (byte == 0xF0) + lo_bound = 0x90; + else if (byte == 0xF4) + hi_bound = 0x8F; + + needed = 3; + if constexpr (transcode) + code_point = byte & 0x07; + } + else [[unlikely]] + { + if constexpr (transcode) + out.push_back(0xFFFD); + else + *q = 0xFF; + ++errors; + } + } + else + { + if (byte < lo_bound || byte > hi_bound) [[unlikely]] + { + if constexpr (transcode) + out.push_back(0xFFFD); + else + { + *(q - seen - 1) = 0xFF; + __builtin_memset(q - seen, 0xFE, seen); + } + ++errors; + needed = seen = 0; + lo_bound = 0x80; + hi_bound = 0xBF; + continue; // Reprocess the current character. + } + + if constexpr (transcode) + code_point = (code_point << 6) | (byte & 0x3f); + + lo_bound = 0x80; + hi_bound = 0xBF; + ++seen; + if (seen == needed) [[likely]] + { + if constexpr (transcode) + { + if (code_point <= __gnu_cxx::__int_traits<_CharT>::__max) + out.push_back(code_point); + else + { + // Algorithm from + // http://www.unicode.org/faq/utf_bom.html#utf16-4 + const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10); + char16_t lead = LEAD_OFFSET + (code_point >> 10); + char16_t trail = 0xDC00 + (code_point & 0x3FF); + out.push_back(lead); + out.push_back(trail); + } + } + needed = seen = 0; + } + } + ++q; + } + + if (needed) [[unlikely]] + { + // The string ends with an incomplete multibyte sequence. + if constexpr (transcode) + out.push_back(0xFFFD); + else + { + // Truncate the incomplete sequence to a single byte. + if (seen) + s = s.first(s.size() - seen); + s.back() = 0xFF; + } + ++errors; + } + + if (errors == 0) [[likely]] + return true; + else if constexpr (!transcode) + { + out.reserve(s.size() + errors * 2); + for (unsigned char byte : s) + { + if (byte < 0xFE) [[likely]] + out += (char)byte; + else if (byte == 0xFF) + out += "\xef\xbf\xbd"; // U+FFFD in UTF-8 + } + } + return false; + } + + // Validate UTF-8 string. + // Returns true if s is valid UTF-8, otherwise returns false and stores + // a valid UTF-8 string in err. + [[__gnu__::__always_inline__]] + inline bool + to_valid_utf8(span<char> s, string& err) + { + return to_valid_unicode(s, err); + } + + // Transcode UTF-8 string to UTF-16. + // Returns true if s is valid UTF-8, otherwise returns false. + // In either case, a valid UTF-16 string is stored in u16. + [[__gnu__::__always_inline__]] + inline bool + to_valid_utf16(span<char> s, u16string& u16) + { + return to_valid_unicode(s, u16); + } +} // namespace + + // Write a UTF-8 string to a file descriptor/handle. + // Ill-formed sequences in the string will be substituted with U+FFFD. + error_code + __write_to_terminal(void* term, span<char> str) + { + if (term == nullptr) [[unlikely]] + return std::make_error_code(std::errc::invalid_argument); + + error_code ec; + +#ifdef _WIN32 + // We could use std::wstring here instead of std::u16string. In general + // char_traits<wchar_t> is more optimized than char_traits<char16_t> but + // for the purposes of to_valid_unicode only char_traits::copy matters, + // and char_traits<char16_t>::copy uses memcpy so is OK. + u16string wstr; + if (!to_valid_utf16(str, wstr)) + ec = std::make_error_code(errc::illegal_byte_sequence); + + unsigned long nchars = 0; + WriteConsoleW(term, wstr.data(), wstr.size(), &nchars, nullptr); + if (nchars != wstr.size()) + return {(int)GetLastError(), system_category()}; +#elifdef _GLIBCXX_HAVE_UNISTD_H + string out; + if (!to_valid_utf8(str, out)) + { + str = out; + ec = std::make_error_code(errc::illegal_byte_sequence); + } + + auto n = std::fwrite(str.data(), 1, str.size(), (FILE*)term); + if (n != str.size()) + ec = std::make_error_code(errc::io_error); +#else + ec = std::make_error_code(std::errc::function_not_supported); +#endif + return ec; + } +_GLIBCXX_END_NAMESPACE_VERSION +} // namespace std |