aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Wakely <jwakely@redhat.com>2024-10-10 13:36:33 +0100
committerJonathan Wakely <redi@gcc.gnu.org>2024-10-14 10:38:46 +0100
commitd8ef4471cb9c9f86784b62424a215ea42173bfe1 (patch)
treeea5749b2f6a0e8bf0a429fd782c0ebc12cdeb55d
parent308d19c11e119b2c5abf67778dd0ac8a370e5df7 (diff)
downloadgcc-d8ef4471cb9c9f86784b62424a215ea42173bfe1.zip
gcc-d8ef4471cb9c9f86784b62424a215ea42173bfe1.tar.gz
gcc-d8ef4471cb9c9f86784b62424a215ea42173bfe1.tar.bz2
libstdc++: Enable memset optimizations for distinct character types [PR93059]
Currently we only optimize std::fill to memset when the source and destination types are the same byte-sized type. This means that we fail to optimize cases like std::fill(buf. buf+n, 0) because the literal 0 is not the same type as the character buffer. Such cases can safely be optimized to use memset, because assigning an int (or other integer) to a narrow character type has the same effects as converting the integer to unsigned char then copying it with memset. This patch enables the optimized code path when the fill character is a memcpy-able integer (using the new __memcpyable_integer trait). We still need to check is_same<U, T> to enable the memset optimization for filling a range of std::byte with a std::byte value, because that isn't a memcpyable integer. libstdc++-v3/ChangeLog: PR libstdc++/93059 * include/bits/stl_algobase.h (__fill_a1(T*, T*, const T&)): Change template parameters and enable_if condition to allow the fill value to be an integer.
-rw-r--r--libstdc++-v3/include/bits/stl_algobase.h20
1 files changed, 12 insertions, 8 deletions
diff --git a/libstdc++-v3/include/bits/stl_algobase.h b/libstdc++-v3/include/bits/stl_algobase.h
index 9e92211..384e5fd 100644
--- a/libstdc++-v3/include/bits/stl_algobase.h
+++ b/libstdc++-v3/include/bits/stl_algobase.h
@@ -967,23 +967,27 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
#pragma GCC diagnostic pop
// Specialization: for char types we can use memset.
- template<typename _Tp>
+ template<typename _Up, typename _Tp>
_GLIBCXX20_CONSTEXPR
inline typename
- __gnu_cxx::__enable_if<__is_byte<_Tp>::__value, void>::__type
- __fill_a1(_Tp* __first, _Tp* __last, const _Tp& __c)
- {
- const _Tp __tmp = __c;
+ __gnu_cxx::__enable_if<__is_byte<_Up>::__value
+ && (__are_same<_Up, _Tp>::__value // for std::byte
+ || __memcpyable_integer<_Tp>::__value),
+ void>::__type
+ __fill_a1(_Up* __first, _Up* __last, const _Tp& __x)
+ {
+ // This hoists the load out of the loop and also ensures that we don't
+ // use memset for cases where the assignment would be ill-formed.
+ const _Up __val = __x;
#if __cpp_lib_is_constant_evaluated
if (std::is_constant_evaluated())
{
for (; __first != __last; ++__first)
- *__first = __tmp;
- return;
+ *__first = __val;
}
#endif
if (const size_t __len = __last - __first)
- __builtin_memset(__first, static_cast<unsigned char>(__tmp), __len);
+ __builtin_memset(__first, static_cast<unsigned char>(__val), __len);
}
template<typename _Ite, typename _Cont, typename _Tp>