diff options
author | Tomasz Kamiński <tkaminsk@redhat.com> | 2025-04-03 10:23:45 +0200 |
---|---|---|
committer | Tomasz Kamiński <tkaminsk@redhat.com> | 2025-04-03 13:46:20 +0200 |
commit | 5c7f6272f43f4265dc08eac4ee91164672c1c441 (patch) | |
tree | a74b38b01cd8c0f5d674706477367f327036bd3a | |
parent | 70bf0ee44017e8e26bb1bdcb6a3fd114c25c39c7 (diff) | |
download | gcc-5c7f6272f43f4265dc08eac4ee91164672c1c441.zip gcc-5c7f6272f43f4265dc08eac4ee91164672c1c441.tar.gz gcc-5c7f6272f43f4265dc08eac4ee91164672c1c441.tar.bz2 |
libstdc++: Fix handling of field width for wide strings and characters [PR119593]
This patch corrects handling of UTF-32LE and UTF32-BE in
__unicode::__literal_encoding_is_unicode<_CharT>, so they are
recognized as unicode and functions produces correct result for wchar_t.
Use `__unicode::__field_width` to compute the estimated witdh
of the charcter for unicode wide encoding.
PR libstdc++/119593
libstdc++-v3/ChangeLog:
* include/bits/unicode.h
(__unicode::__literal_encoding_is_unicode<_CharT>):
Corrected handing for UTF-16 and UTF-32 with "LE" or "BE" suffix.
* include/std/format (__formatter_str::_S_character_width):
Define.
(__formatter_str::_S_character_width): Updated passed char
length.
* testsuite/std/format/functions/format.cc: Test for wchar_t.
Reviewed-by: Jonathan Wakely <jwakely@redhat.com>
Signed-off-by: Tomasz Kamiński <tkaminsk@redhat.com>
-rw-r--r-- | libstdc++-v3/include/bits/unicode.h | 2 | ||||
-rw-r--r-- | libstdc++-v3/include/std/format | 16 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/std/format/functions/format.cc | 8 |
3 files changed, 23 insertions, 3 deletions
diff --git a/libstdc++-v3/include/bits/unicode.h b/libstdc++-v3/include/bits/unicode.h index 24b1ac3..99d972e 100644 --- a/libstdc++-v3/include/bits/unicode.h +++ b/libstdc++-v3/include/bits/unicode.h @@ -1039,6 +1039,8 @@ inline namespace __v16_0_0 string_view __s(__enc); if (__s.ends_with("//")) __s.remove_suffix(2); + if (__s.ends_with("LE") || __s.ends_with("BE")) + __s.remove_suffix(2); return __s == "16" || __s == "32"; } } diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format index c3327e1..9ef719e 100644 --- a/libstdc++-v3/include/std/format +++ b/libstdc++-v3/include/std/format @@ -1277,12 +1277,26 @@ namespace __format _M_spec); } + [[__gnu__::__always_inline__]] + static size_t + _S_character_width(_CharT __c) + { + // N.B. single byte cannot encode charcter of width greater than 1 + if constexpr (sizeof(_CharT) > 1u && + __unicode::__literal_encoding_is_unicode<_CharT>()) + return __unicode::__field_width(__c); + else + return 1u; + } + template<typename _Out> typename basic_format_context<_Out, _CharT>::iterator _M_format_character(_CharT __c, basic_format_context<_Out, _CharT>& __fc) const { - return __format::__write_padded_as_spec({&__c, 1u}, 1, __fc, _M_spec); + return __format::__write_padded_as_spec({&__c, 1u}, + _S_character_width(__c), + __fc, _M_spec); } template<typename _Int> diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc b/libstdc++-v3/testsuite/std/format/functions/format.cc index 7fc4201..d8dbf46 100644 --- a/libstdc++-v3/testsuite/std/format/functions/format.cc +++ b/libstdc++-v3/testsuite/std/format/functions/format.cc @@ -501,9 +501,14 @@ test_unicode() { // Similar to sC example in test_std_examples, but not from the standard. // Verify that the character "🤡" has estimated field width 2, - // rather than estimated field width equal to strlen("🤡"), which would be 4. + // rather than estimated field width equal to strlen("🤡"), which would be 4, + // or just width 1 for single character. std::string sC = std::format("{:*<3}", "🤡"); VERIFY( sC == "🤡*" ); + std::wstring wsC = std::format(L"{:*<3}", L"🤡"); + VERIFY( wsC == L"🤡*" ); + wsC = std::format(L"{:*<3}", L'🤡'); + VERIFY( wsC == L"🤡*" ); // Verify that "£" has estimated field width 1, not strlen("£") == 2. std::string sL = std::format("{:*<3}", "£"); @@ -517,7 +522,6 @@ test_unicode() std::string sP = std::format("{:1.1} {:*<1.1}", "£", "🤡"); VERIFY( sP == "£ *" ); sP = std::format("{:*<2.1} {:*<2.1}", "£", "🤡"); - VERIFY( sP == "£* **" ); // Verify field width handling for extended grapheme clusters, // and that a cluster gets output as a single item, not truncated. |