aboutsummaryrefslogtreecommitdiff
path: root/libstdc++-v3/src
diff options
context:
space:
mode:
authorJonathan Wakely <jwakely@redhat.com>2023-12-14 23:23:34 +0000
committerJonathan Wakely <jwakely@redhat.com>2023-12-14 23:59:24 +0000
commitfe54b57728c09ab0389e2bb3f079d5210566199d (patch)
tree1007a2d3e0e8d60906e2a21ae483ef36928f17c1 /libstdc++-v3/src
parent29ad35a1db645f6027acc4f2a9b15363f402ca97 (diff)
downloadgcc-fe54b57728c09ab0389e2bb3f079d5210566199d.zip
gcc-fe54b57728c09ab0389e2bb3f079d5210566199d.tar.gz
gcc-fe54b57728c09ab0389e2bb3f079d5210566199d.tar.bz2
libstdc++: Implement C++23 <print> header [PR107760]
This adds the C++23 std::print functions, which use std::format to write to a FILE stream or std::ostream (defaulting to stdout). The new extern symbols are in the libstdc++exp.a archive, so we aren't committing to stable symbols in the DSO yet. There's a UTF-8 validating and transcoding function added by this change. That can certainly be optimized, but it's internal to libstdc++exp.a so can be tweaked later at leisure. Currently the external symbols work for all targets, but are only actually used for Windows, where it's necessary to transcode to UTF-16 to write to the console. The standard seems to encourage us to also diagnose invalid UTF-8 for non-Windows targets when writing to a terminal (and only when writing to a terminal), but I'm reliably informed that that wasn't the intent of the wording. Checking for invalid UTF-8 sequences only needs to happen for Windows, which is good as checking for a terminal requires a call to isatty, and on Linux that uses an ioctl syscall, which would make std::print ten times slower! Testing the std::print behaviour is difficult if it depends on whether the output stream is connected to a Windows console or not, as we can't (as far as I know) do that non-interactively in DejaGNU. One of the new tests uses the internal __write_to_terminal function directly. That allows us to verify its UTF-8 error handling on POSIX targets, even though that's not actually used by std::print. For Windows, that __write_to_terminal function transcodes to UTF-16 but then uses WriteConsoleW which fails unless it really is writing to the console. That means the 27_io/print/2.cc test FAILs on Windows. The UTF-16 transcoding has been manually tested using mingw-w64 and Wine, and appears to work. libstdc++-v3/ChangeLog: PR libstdc++/107760 * include/Makefile.am: Add new header. * include/Makefile.in: Regenerate. * include/bits/version.def (__cpp_lib_print): Define. * include/bits/version.h: Regenerate. * include/std/format (__literal_encoding_is_utf8): New function. (_Seq_sink::view()): New member function. * include/std/ostream (vprintf_nonunicode, vprintf_unicode) (print, println): New functions. * include/std/print: New file. * src/c++23/Makefile.am: Add new source file. * src/c++23/Makefile.in: Regenerate. * src/c++23/print.cc: New file. * testsuite/27_io/basic_ostream/print/1.cc: New test. * testsuite/27_io/print/1.cc: New test. * testsuite/27_io/print/2.cc: New test.
Diffstat (limited to 'libstdc++-v3/src')
-rw-r--r--libstdc++-v3/src/c++23/Makefile.am8
-rw-r--r--libstdc++-v3/src/c++23/Makefile.in10
-rw-r--r--libstdc++-v3/src/c++23/print.cc348
3 files changed, 363 insertions, 3 deletions
diff --git a/libstdc++-v3/src/c++23/Makefile.am b/libstdc++-v3/src/c++23/Makefile.am
index da988c3..7693875 100644
--- a/libstdc++-v3/src/c++23/Makefile.am
+++ b/libstdc++-v3/src/c++23/Makefile.am
@@ -35,7 +35,7 @@ else
inst_sources =
endif
-sources = stacktrace.cc
+sources = stacktrace.cc print.cc
vpath % $(top_srcdir)/src/c++23
@@ -46,6 +46,12 @@ else
libc__23convenience_la_SOURCES =
endif
+# Use C++26 so that std::filebuf::native_handle() is available.
+print.lo: print.cc
+ $(LTCXXCOMPILE) -std=gnu++26 -c $<
+print.o: print.cc
+ $(CXXCOMPILE) -std=gnu++26 -c $<
+
# AM_CXXFLAGS needs to be in each subdirectory so that it can be
# modified in a per-library or per-sub-library way. Need to manually
# set this option because CONFIG_CXXFLAGS has to be after
diff --git a/libstdc++-v3/src/c++23/Makefile.in b/libstdc++-v3/src/c++23/Makefile.in
index 1121749..ce60968 100644
--- a/libstdc++-v3/src/c++23/Makefile.in
+++ b/libstdc++-v3/src/c++23/Makefile.in
@@ -121,7 +121,7 @@ CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libc__23convenience_la_LIBADD =
-am__objects_1 = stacktrace.lo
+am__objects_1 = stacktrace.lo print.lo
am__objects_2 =
@GLIBCXX_HOSTED_TRUE@am_libc__23convenience_la_OBJECTS = \
@GLIBCXX_HOSTED_TRUE@ $(am__objects_1) $(am__objects_2)
@@ -430,7 +430,7 @@ headers =
# XTEMPLATE_FLAGS = -fno-implicit-templates
@ENABLE_EXTERN_TEMPLATE_TRUE@inst_sources =
-sources = stacktrace.cc
+sources = stacktrace.cc print.cc
@GLIBCXX_HOSTED_FALSE@libc__23convenience_la_SOURCES =
@GLIBCXX_HOSTED_TRUE@libc__23convenience_la_SOURCES = $(sources) $(inst_sources)
@@ -742,6 +742,12 @@ uninstall-am:
vpath % $(top_srcdir)/src/c++23
+# Use C++26 so that std::filebuf::native_handle() is available.
+print.lo: print.cc
+ $(LTCXXCOMPILE) -std=gnu++26 -c $<
+print.o: print.cc
+ $(CXXCOMPILE) -std=gnu++26 -c $<
+
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
diff --git a/libstdc++-v3/src/c++23/print.cc b/libstdc++-v3/src/c++23/print.cc
new file mode 100644
index 0000000..2fe7a2e
--- /dev/null
+++ b/libstdc++-v3/src/c++23/print.cc
@@ -0,0 +1,348 @@
+// std::print -*- C++ -*-
+
+// Copyright The GNU Toolchain Authors.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
+
+#include <span>
+#include <string>
+#include <streambuf>
+#include <system_error>
+#include <cstdio>
+#include <cstdint> // uint32_t
+#include <fstream>
+#include <ext/stdio_filebuf.h>
+#include <ext/stdio_sync_filebuf.h>
+#include <ext/numeric_traits.h>
+
+#ifdef _WIN32
+# include <stdio.h> // _fileno
+# include <io.h> // _get_osfhandle
+# include <windows.h> // GetLastError, WriteConsoleW
+#elifdef _GLIBCXX_HAVE_UNISTD_H
+# include <stdio.h> // fileno
+# include <unistd.h> // isatty
+#endif
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+#ifdef _WIN32
+namespace
+{
+ void*
+ check_for_console(void* handle)
+ {
+ if (handle != nullptr && handle != INVALID_HANDLE_VALUE)
+ {
+ unsigned long mode; // unused
+ if (::GetConsoleMode(handle, &mode))
+ return handle;
+ }
+ return nullptr;
+ }
+} // namespace
+#endif
+
+ // This returns intptr_t that is either a Windows HANDLE
+ // or 1 + a POSIX file descriptor. A zero return indicates failure.
+ void*
+ __open_terminal(FILE* f)
+ {
+#ifndef _GLIBCXX_USE_STDIO_PURE
+ if (f)
+ {
+#ifdef _WIN32
+ if (int fd = ::_fileno(f); fd >= 0)
+ return check_for_console((void*)_get_osfhandle(fd));
+#elifdef _GLIBCXX_HAVE_UNISTD_H
+ if (int fd = ::fileno(f); fd >= 0 && ::isatty(fd))
+ return f;
+#endif
+ }
+#endif
+ return nullptr;
+ }
+
+ void*
+ __open_terminal(std::streambuf* sb)
+ {
+#ifndef _GLIBCXX_USE_STDIO_PURE
+ using namespace __gnu_cxx;
+
+ if (auto fb = dynamic_cast<stdio_sync_filebuf<char>*>(sb))
+ return __open_terminal(fb->file());
+
+ if (auto fb = dynamic_cast<stdio_filebuf<char>*>(sb))
+ return __open_terminal(fb->file());
+
+#ifdef __glibcxx_fstream_native_handle
+#ifdef _WIN32
+ if (auto fb = dynamic_cast<filebuf*>(sb))
+ return check_for_console(fb->native_handle());
+#elifdef _GLIBCXX_HAVE_UNISTD_H
+ if (auto fb = dynamic_cast<filebuf*>(sb))
+ if (int fd = fb->native_handle(); fd >= 0 && ::isatty(fd))
+ return ::fdopen(::dup(fd), "w"); // Caller must call fclose.
+#endif
+#endif
+#endif // ! _GLIBCXX_USE_STDIO_PURE
+
+ return nullptr;
+ }
+
+namespace
+{
+ // Validate UTF-8 string, replacing invalid sequences with U+FFFD.
+ //
+ // Return true if the input is valid UTF-8, false otherwise.
+ //
+ // If sizeof(_CharT) > 1, then transcode a valid string into out,
+ // using either UTF-16 or UTF-32 as determined by sizeof(_CharT).
+ //
+ // If sizeof(_CharT) == 1 and the input is valid UTF-8, both s and out will
+ // be unchanged. Otherwise, each invalid sequence in s will be overwritten
+ // with a single 0xFF byte followed by zero or more 0xFE bytes, and then
+ // a valid UTF-8 string will be produced in out (replacing invalid
+ // sequences with U+FFFD).
+ template<typename _CharT>
+ bool
+ to_valid_unicode(span<char> s, basic_string<_CharT>& out)
+ {
+ constexpr bool transcode = sizeof(_CharT) > 1;
+
+ unsigned seen = 0, needed = 0;
+ unsigned char lo_bound = 0x80, hi_bound = 0xBF;
+ size_t errors = 0;
+
+ [[maybe_unused]] uint32_t code_point{};
+ if constexpr (transcode)
+ {
+ out.clear();
+ // XXX: count code points in s instead of bytes?
+ out.reserve(s.size());
+ }
+
+ auto q = s.data(), eoq = q + s.size();
+ while (q != eoq)
+ {
+ unsigned char byte = *q;
+ if (needed == 0)
+ {
+ if (byte <= 0x7F) [[likely]] // 0x00 to 0x7F
+ {
+ if constexpr (transcode)
+ out.push_back(_CharT(byte));
+
+ // Fast forward to the next non-ASCII character.
+ while (++q != eoq && (unsigned char)*q <= 0x7F)
+ {
+ if constexpr (transcode)
+ out.push_back(*q);
+ }
+ continue;
+ }
+ else if (byte < 0xC2)
+ {
+ if constexpr (transcode)
+ out.push_back(0xFFFD);
+ else
+ *q = 0xFF;
+ ++errors;
+ }
+ else if (byte <= 0xDF) // 0xC2 to 0xDF
+ {
+ needed = 1;
+ if constexpr (transcode)
+ code_point = byte & 0x1F;
+ }
+ else if (byte <= 0xEF) // 0xE0 to 0xEF
+ {
+ if (byte == 0xE0)
+ lo_bound = 0xA0;
+ else if (byte == 0xED)
+ hi_bound = 0x9F;
+
+ needed = 2;
+ if constexpr (transcode)
+ code_point = byte & 0x0F;
+ }
+ else if (byte <= 0xF4) // 0xF0 to 0xF4
+ {
+ if (byte == 0xF0)
+ lo_bound = 0x90;
+ else if (byte == 0xF4)
+ hi_bound = 0x8F;
+
+ needed = 3;
+ if constexpr (transcode)
+ code_point = byte & 0x07;
+ }
+ else [[unlikely]]
+ {
+ if constexpr (transcode)
+ out.push_back(0xFFFD);
+ else
+ *q = 0xFF;
+ ++errors;
+ }
+ }
+ else
+ {
+ if (byte < lo_bound || byte > hi_bound) [[unlikely]]
+ {
+ if constexpr (transcode)
+ out.push_back(0xFFFD);
+ else
+ {
+ *(q - seen - 1) = 0xFF;
+ __builtin_memset(q - seen, 0xFE, seen);
+ }
+ ++errors;
+ needed = seen = 0;
+ lo_bound = 0x80;
+ hi_bound = 0xBF;
+ continue; // Reprocess the current character.
+ }
+
+ if constexpr (transcode)
+ code_point = (code_point << 6) | (byte & 0x3f);
+
+ lo_bound = 0x80;
+ hi_bound = 0xBF;
+ ++seen;
+ if (seen == needed) [[likely]]
+ {
+ if constexpr (transcode)
+ {
+ if (code_point <= __gnu_cxx::__int_traits<_CharT>::__max)
+ out.push_back(code_point);
+ else
+ {
+ // Algorithm from
+ // http://www.unicode.org/faq/utf_bom.html#utf16-4
+ const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
+ char16_t lead = LEAD_OFFSET + (code_point >> 10);
+ char16_t trail = 0xDC00 + (code_point & 0x3FF);
+ out.push_back(lead);
+ out.push_back(trail);
+ }
+ }
+ needed = seen = 0;
+ }
+ }
+ ++q;
+ }
+
+ if (needed) [[unlikely]]
+ {
+ // The string ends with an incomplete multibyte sequence.
+ if constexpr (transcode)
+ out.push_back(0xFFFD);
+ else
+ {
+ // Truncate the incomplete sequence to a single byte.
+ if (seen)
+ s = s.first(s.size() - seen);
+ s.back() = 0xFF;
+ }
+ ++errors;
+ }
+
+ if (errors == 0) [[likely]]
+ return true;
+ else if constexpr (!transcode)
+ {
+ out.reserve(s.size() + errors * 2);
+ for (unsigned char byte : s)
+ {
+ if (byte < 0xFE) [[likely]]
+ out += (char)byte;
+ else if (byte == 0xFF)
+ out += "\xef\xbf\xbd"; // U+FFFD in UTF-8
+ }
+ }
+ return false;
+ }
+
+ // Validate UTF-8 string.
+ // Returns true if s is valid UTF-8, otherwise returns false and stores
+ // a valid UTF-8 string in err.
+ [[__gnu__::__always_inline__]]
+ inline bool
+ to_valid_utf8(span<char> s, string& err)
+ {
+ return to_valid_unicode(s, err);
+ }
+
+ // Transcode UTF-8 string to UTF-16.
+ // Returns true if s is valid UTF-8, otherwise returns false.
+ // In either case, a valid UTF-16 string is stored in u16.
+ [[__gnu__::__always_inline__]]
+ inline bool
+ to_valid_utf16(span<char> s, u16string& u16)
+ {
+ return to_valid_unicode(s, u16);
+ }
+} // namespace
+
+ // Write a UTF-8 string to a file descriptor/handle.
+ // Ill-formed sequences in the string will be substituted with U+FFFD.
+ error_code
+ __write_to_terminal(void* term, span<char> str)
+ {
+ if (term == nullptr) [[unlikely]]
+ return std::make_error_code(std::errc::invalid_argument);
+
+ error_code ec;
+
+#ifdef _WIN32
+ // We could use std::wstring here instead of std::u16string. In general
+ // char_traits<wchar_t> is more optimized than char_traits<char16_t> but
+ // for the purposes of to_valid_unicode only char_traits::copy matters,
+ // and char_traits<char16_t>::copy uses memcpy so is OK.
+ u16string wstr;
+ if (!to_valid_utf16(str, wstr))
+ ec = std::make_error_code(errc::illegal_byte_sequence);
+
+ unsigned long nchars = 0;
+ WriteConsoleW(term, wstr.data(), wstr.size(), &nchars, nullptr);
+ if (nchars != wstr.size())
+ return {(int)GetLastError(), system_category()};
+#elifdef _GLIBCXX_HAVE_UNISTD_H
+ string out;
+ if (!to_valid_utf8(str, out))
+ {
+ str = out;
+ ec = std::make_error_code(errc::illegal_byte_sequence);
+ }
+
+ auto n = std::fwrite(str.data(), 1, str.size(), (FILE*)term);
+ if (n != str.size())
+ ec = std::make_error_code(errc::io_error);
+#else
+ ec = std::make_error_code(std::errc::function_not_supported);
+#endif
+ return ec;
+ }
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace std