diff options
-rw-r--r-- | libcxx/docs/ReleaseNotes/21.rst | 3 | ||||
-rw-r--r-- | libcxx/include/__algorithm/equal.h | 156 | ||||
-rw-r--r-- | libcxx/include/__bit_reference | 161 | ||||
-rw-r--r-- | libcxx/include/__vector/comparison.h | 14 | ||||
-rw-r--r-- | libcxx/include/bitset | 1 | ||||
-rw-r--r-- | libcxx/test/benchmarks/algorithms/equal.bench.cpp | 51 | ||||
-rw-r--r-- | libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp | 35 | ||||
-rw-r--r-- | libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp | 251 |
8 files changed, 451 insertions, 221 deletions
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst index 30c6cc5..e7cfa62 100644 --- a/libcxx/docs/ReleaseNotes/21.rst +++ b/libcxx/docs/ReleaseNotes/21.rst @@ -47,6 +47,9 @@ Improvements and New Features - The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward}`` algorithms have been optimized for ``std::vector<bool>::iterator``, resulting in a performance improvement of up to 2000x. +- The ``std::ranges::equal`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a performance + improvement of up to 188x. + - Updated formatting library to Unicode 16.0.0. Deprecations and Removals diff --git a/libcxx/include/__algorithm/equal.h b/libcxx/include/__algorithm/equal.h index a276bb9..4cac965 100644 --- a/libcxx/include/__algorithm/equal.h +++ b/libcxx/include/__algorithm/equal.h @@ -11,16 +11,20 @@ #define _LIBCPP___ALGORITHM_EQUAL_H #include <__algorithm/comp.h> +#include <__algorithm/min.h> #include <__algorithm/unwrap_iter.h> #include <__config> #include <__functional/identity.h> +#include <__fwd/bit_reference.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> +#include <__memory/pointer_traits.h> #include <__string/constexpr_c_functions.h> #include <__type_traits/desugars_to.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_equality_comparable.h> +#include <__type_traits/is_same.h> #include <__type_traits/is_volatile.h> #include <__utility/move.h> @@ -33,6 +37,136 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD +template <class _Cp, bool _IsConst1, bool _IsConst2> +[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool +__equal_unaligned(__bit_iterator<_Cp, _IsConst1> __first1, + __bit_iterator<_Cp, _IsConst1> __last1, + __bit_iterator<_Cp, _IsConst2> __first2) { + using _It = __bit_iterator<_Cp, _IsConst1>; + using difference_type = typename _It::difference_type; + using __storage_type = typename _It::__storage_type; + + const int __bits_per_word = _It::__bits_per_word; + difference_type __n = __last1 - __first1; + if (__n > 0) { + // do first word + if (__first1.__ctz_ != 0) { + unsigned __clz_f = __bits_per_word - __first1.__ctz_; + difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b = *__first1.__seg_ & __m; + unsigned __clz_r = __bits_per_word - __first2.__ctz_; + __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r); + __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); + if (__first2.__ctz_ > __first1.__ctz_) { + if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_))) + return false; + } else { + if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_))) + return false; + } + __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word; + __first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word); + __dn -= __ddn; + if (__dn > 0) { + __m = ~__storage_type(0) >> (__bits_per_word - __dn); + if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn))) + return false; + __first2.__ctz_ = static_cast<unsigned>(__dn); + } + ++__first1.__seg_; + // __first1.__ctz_ = 0; + } + // __first1.__ctz_ == 0; + // do middle words + unsigned __clz_r = __bits_per_word - __first2.__ctz_; + __storage_type __m = ~__storage_type(0) << __first2.__ctz_; + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) { + __storage_type __b = *__first1.__seg_; + if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) + return false; + ++__first2.__seg_; + if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r)) + return false; + } + // do last word + if (__n > 0) { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__first1.__seg_ & __m; + __storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r)); + __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); + if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) + return false; + __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word; + __first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + if ((*__first2.__seg_ & __m) != (__b >> __dn)) + return false; + } + } + } + return true; +} + +template <class _Cp, bool _IsConst1, bool _IsConst2> +[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool +__equal_aligned(__bit_iterator<_Cp, _IsConst1> __first1, + __bit_iterator<_Cp, _IsConst1> __last1, + __bit_iterator<_Cp, _IsConst2> __first2) { + using _It = __bit_iterator<_Cp, _IsConst1>; + using difference_type = typename _It::difference_type; + using __storage_type = typename _It::__storage_type; + + const int __bits_per_word = _It::__bits_per_word; + difference_type __n = __last1 - __first1; + if (__n > 0) { + // do first word + if (__first1.__ctz_ != 0) { + unsigned __clz = __bits_per_word - __first1.__ctz_; + difference_type __dn = std::min(static_cast<difference_type>(__clz), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); + if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) + return false; + ++__first2.__seg_; + ++__first1.__seg_; + // __first1.__ctz_ = 0; + // __first2.__ctz_ = 0; + } + // __first1.__ctz_ == 0; + // __first2.__ctz_ == 0; + // do middle words + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_) + if (*__first2.__seg_ != *__first1.__seg_) + return false; + // do last word + if (__n > 0) { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) + return false; + } + } + return true; +} + +template <class _Cp, + bool _IsConst1, + bool _IsConst2, + class _BinaryPredicate, + __enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, int> = 0> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl( + __bit_iterator<_Cp, _IsConst1> __first1, + __bit_iterator<_Cp, _IsConst1> __last1, + __bit_iterator<_Cp, _IsConst2> __first2, + _BinaryPredicate) { + if (__first1.__ctz_ == __first2.__ctz_) + return std::__equal_aligned(__first1, __last1, __first2); + return std::__equal_unaligned(__first1, __last1, __first2); +} + template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate> [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl( _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate& __pred) { @@ -94,6 +228,28 @@ __equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&, return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1)); } +template <class _Cp, + bool _IsConst1, + bool _IsConst2, + class _Pred, + class _Proj1, + class _Proj2, + __enable_if_t<__desugars_to_v<__equal_tag, _Pred, bool, bool> && __is_identity<_Proj1>::value && + __is_identity<_Proj2>::value, + int> = 0> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl( + __bit_iterator<_Cp, _IsConst1> __first1, + __bit_iterator<_Cp, _IsConst1> __last1, + __bit_iterator<_Cp, _IsConst2> __first2, + __bit_iterator<_Cp, _IsConst2>, + _Pred&, + _Proj1&, + _Proj2&) { + if (__first1.__ctz_ == __first2.__ctz_) + return std::__equal_aligned(__first1, __last1, __first2); + return std::__equal_unaligned(__first1, __last1, __first2); +} + template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate> [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool equal(_InputIterator1 __first1, diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference index 377f5fed..079d1de 100644 --- a/libcxx/include/__bit_reference +++ b/libcxx/include/__bit_reference @@ -10,9 +10,11 @@ #ifndef _LIBCPP___BIT_REFERENCE #define _LIBCPP___BIT_REFERENCE +#include <__algorithm/comp.h> #include <__algorithm/copy.h> #include <__algorithm/copy_backward.h> #include <__algorithm/copy_n.h> +#include <__algorithm/equal.h> #include <__algorithm/min.h> #include <__assert> #include <__bit/countr.h> @@ -20,13 +22,16 @@ #include <__config> #include <__cstddef/ptrdiff_t.h> #include <__cstddef/size_t.h> +#include <__functional/identity.h> #include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> #include <__memory/construct_at.h> #include <__memory/pointer_traits.h> #include <__type_traits/conditional.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_constant_evaluated.h> +#include <__type_traits/is_same.h> #include <__type_traits/is_unsigned.h> #include <__type_traits/void_t.h> #include <__utility/pair.h> @@ -428,127 +433,6 @@ rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, return __r; } -// equal - -template <class _Cp, bool _IC1, bool _IC2> -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_unaligned( - __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { - using _It = __bit_iterator<_Cp, _IC1>; - using difference_type = typename _It::difference_type; - using __storage_type = typename _It::__storage_type; - - const int __bits_per_word = _It::__bits_per_word; - difference_type __n = __last1 - __first1; - if (__n > 0) { - // do first word - if (__first1.__ctz_ != 0) { - unsigned __clz_f = __bits_per_word - __first1.__ctz_; - difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b = *__first1.__seg_ & __m; - unsigned __clz_r = __bits_per_word - __first2.__ctz_; - __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r); - __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); - if (__first2.__ctz_ > __first1.__ctz_) { - if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_))) - return false; - } else { - if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_))) - return false; - } - __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word; - __first2.__ctz_ = static_cast<unsigned>((__ddn + __first2.__ctz_) % __bits_per_word); - __dn -= __ddn; - if (__dn > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __dn); - if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn))) - return false; - __first2.__ctz_ = static_cast<unsigned>(__dn); - } - ++__first1.__seg_; - // __first1.__ctz_ = 0; - } - // __first1.__ctz_ == 0; - // do middle words - unsigned __clz_r = __bits_per_word - __first2.__ctz_; - __storage_type __m = ~__storage_type(0) << __first2.__ctz_; - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) { - __storage_type __b = *__first1.__seg_; - if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) - return false; - ++__first2.__seg_; - if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r)) - return false; - } - // do last word - if (__n > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first1.__seg_ & __m; - __storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r)); - __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); - if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) - return false; - __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word; - __first2.__ctz_ = static_cast<unsigned>((__dn + __first2.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - if ((*__first2.__seg_ & __m) != (__b >> __dn)) - return false; - } - } - } - return true; -} - -template <class _Cp, bool _IC1, bool _IC2> -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_aligned( - __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { - using _It = __bit_iterator<_Cp, _IC1>; - using difference_type = typename _It::difference_type; - using __storage_type = typename _It::__storage_type; - - const int __bits_per_word = _It::__bits_per_word; - difference_type __n = __last1 - __first1; - if (__n > 0) { - // do first word - if (__first1.__ctz_ != 0) { - unsigned __clz = __bits_per_word - __first1.__ctz_; - difference_type __dn = std::min(static_cast<difference_type>(__clz), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); - if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) - return false; - ++__first2.__seg_; - ++__first1.__seg_; - // __first1.__ctz_ = 0; - // __first2.__ctz_ = 0; - } - // __first1.__ctz_ == 0; - // __first2.__ctz_ == 0; - // do middle words - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_) - if (*__first2.__seg_ != *__first1.__seg_) - return false; - // do last word - if (__n > 0) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) - return false; - } - } - return true; -} - -template <class _Cp, bool _IC1, bool _IC2> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool -equal(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { - if (__first1.__ctz_ == __first2.__ctz_) - return std::__equal_aligned(__first1, __last1, __first2); - return std::__equal_unaligned(__first1, __last1, __first2); -} - template <class _Cp, bool _IsConst, typename _Cp::__storage_type> class __bit_iterator { public: @@ -771,15 +655,36 @@ private: template <class _Dp> _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>); - template <class _Dp, bool _IC1, bool _IC2> - _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool - __equal_aligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); - template <class _Dp, bool _IC1, bool _IC2> + template <class _Dp, bool _IsConst1, bool _IsConst2> _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool - __equal_unaligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); - template <class _Dp, bool _IC1, bool _IC2> + __equal_aligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>); + template <class _Dp, bool _IsConst1, bool _IsConst2> _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool - equal(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); + __equal_unaligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>); + template <class _Dp, + bool _IsConst1, + bool _IsConst2, + class _BinaryPredicate, + __enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, int> > + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_iter_impl( + __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>, _BinaryPredicate); + template <class _Dp, + bool _IsConst1, + bool _IsConst2, + class _Pred, + class _Proj1, + class _Proj2, + __enable_if_t<__desugars_to_v<__equal_tag, _Pred, bool, bool> && __is_identity<_Proj1>::value && + __is_identity<_Proj2>::value, + int> > + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_impl( + __bit_iterator<_Dp, _IsConst1> __first1, + __bit_iterator<_Dp, _IsConst1> __last1, + __bit_iterator<_Dp, _IsConst2> __first2, + __bit_iterator<_Dp, _IsConst2>, + _Pred&, + _Proj1&, + _Proj2&); template <bool _ToFind, class _Dp, bool _IC> _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, _IC> __find_bool(__bit_iterator<_Dp, _IC>, typename __size_difference_type_traits<_Dp>::size_type); diff --git a/libcxx/include/__vector/comparison.h b/libcxx/include/__vector/comparison.h index 27178e4..960663e 100644 --- a/libcxx/include/__vector/comparison.h +++ b/libcxx/include/__vector/comparison.h @@ -29,6 +29,20 @@ operator==(const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __ return __sz == __y.size() && std::equal(__x.begin(), __x.end(), __y.begin()); } +// FIXME: Remove this `vector<bool>` overload once #126369 is resolved, reverting to the generic `operator==` +// with `std::equal` for better performance. +template <class _Allocator> +_LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI bool +operator==(const vector<bool, _Allocator>& __x, const vector<bool, _Allocator>& __y) { + const typename vector<bool, _Allocator>::size_type __sz = __x.size(); + if (__sz != __y.size()) + return false; + for (typename vector<bool, _Allocator>::size_type __i = 0; __i < __sz; ++__i) + if (__x[__i] != __y[__i]) + return false; + return true; +} + #if _LIBCPP_STD_VER <= 17 template <class _Tp, class _Allocator> diff --git a/libcxx/include/bitset b/libcxx/include/bitset index d9acc32..f905b6f 100644 --- a/libcxx/include/bitset +++ b/libcxx/include/bitset @@ -132,6 +132,7 @@ template <size_t N> struct hash<std::bitset<N>>; # include <__algorithm/copy.h> # include <__algorithm/copy_backward.h> # include <__algorithm/count.h> +# include <__algorithm/equal.h> # include <__algorithm/fill.h> # include <__algorithm/fill_n.h> # include <__algorithm/find.h> diff --git a/libcxx/test/benchmarks/algorithms/equal.bench.cpp b/libcxx/test/benchmarks/algorithms/equal.bench.cpp index 2dc1158..328b396 100644 --- a/libcxx/test/benchmarks/algorithms/equal.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/equal.bench.cpp @@ -45,4 +45,55 @@ static void bm_ranges_equal(benchmark::State& state) { } BENCHMARK(bm_ranges_equal)->DenseRange(1, 8)->Range(16, 1 << 20); +static void bm_ranges_equal_vb_aligned(benchmark::State& state) { + auto n = state.range(); + std::vector<bool> vec1(n, true); + std::vector<bool> vec2(n, true); + for (auto _ : state) { + benchmark::DoNotOptimize(std::ranges::equal(vec1, vec2)); + benchmark::DoNotOptimize(&vec1); + benchmark::DoNotOptimize(&vec2); + } +} + +static void bm_ranges_equal_vb_unaligned(benchmark::State& state) { + auto n = state.range(); + std::vector<bool> vec1(n, true); + std::vector<bool> vec2(n + 8, true); + auto beg1 = std::ranges::begin(vec1); + auto end1 = std::ranges::end(vec1); + auto beg2 = std::ranges::begin(vec2) + 4; + auto end2 = std::ranges::end(vec2) - 4; + for (auto _ : state) { + benchmark::DoNotOptimize(std::ranges::equal(beg1, end1, beg2, end2)); + benchmark::DoNotOptimize(&vec1); + benchmark::DoNotOptimize(&vec2); + } +} + +// Test std::ranges::equal for vector<bool>::iterator +BENCHMARK(bm_ranges_equal_vb_aligned)->RangeMultiplier(4)->Range(8, 1 << 20); +BENCHMARK(bm_ranges_equal_vb_unaligned)->Range(8, 1 << 20); + +static void bm_equal_vb(benchmark::State& state, bool aligned) { + auto n = state.range(); + std::vector<bool> vec1(n, true); + std::vector<bool> vec2(aligned ? n : n + 8, true); + auto beg1 = vec1.begin(); + auto end1 = vec1.end(); + auto beg2 = aligned ? vec2.begin() : vec2.begin() + 4; + for (auto _ : state) { + benchmark::DoNotOptimize(std::equal(beg1, end1, beg2)); + benchmark::DoNotOptimize(&vec1); + benchmark::DoNotOptimize(&vec2); + } +} + +static void bm_equal_vb_aligned(benchmark::State& state) { bm_equal_vb(state, true); } +static void bm_equal_vb_unaligned(benchmark::State& state) { bm_equal_vb(state, false); } + +// Test std::equal for vector<bool>::iterator +BENCHMARK(bm_equal_vb_aligned)->Range(8, 1 << 20); +BENCHMARK(bm_equal_vb_unaligned)->Range(8, 1 << 20); + BENCHMARK_MAIN(); diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp index c3ba3f8..b37d788 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp @@ -28,6 +28,7 @@ #include <algorithm> #include <cassert> #include <functional> +#include <vector> #include "test_iterators.h" #include "test_macros.h" @@ -123,6 +124,30 @@ private: #endif +template <std::size_t N> +TEST_CONSTEXPR_CXX20 void test_vector_bool() { + std::vector<bool> in(N, false); + for (std::size_t i = 0; i < N; i += 2) + in[i] = true; + + { // Test equal() with aligned bytes + std::vector<bool> out = in; + assert(std::equal(in.begin(), in.end(), out.begin())); +#if TEST_STD_VER >= 14 + assert(std::equal(in.begin(), in.end(), out.begin(), out.end())); +#endif + } + + { // Test equal() with unaligned bytes + std::vector<bool> out(N + 8); + std::copy(in.begin(), in.end(), out.begin() + 4); + assert(std::equal(in.begin(), in.end(), out.begin() + 4)); +#if TEST_STD_VER >= 14 + assert(std::equal(in.begin(), in.end(), out.begin() + 4, out.end() - 4)); +#endif + } +} + TEST_CONSTEXPR_CXX20 bool test() { types::for_each(types::cpp17_input_iterator_list<int*>(), TestIter2<int, types::cpp17_input_iterator_list<int*> >()); types::for_each( @@ -138,6 +163,16 @@ TEST_CONSTEXPR_CXX20 bool test() { TestIter2<trivially_equality_comparable, types::cpp17_input_iterator_list<trivially_equality_comparable*>>{}); #endif + { // Test vector<bool>::iterator optimization + test_vector_bool<8>(); + test_vector_bool<19>(); + test_vector_bool<32>(); + test_vector_bool<49>(); + test_vector_bool<64>(); + test_vector_bool<199>(); + test_vector_bool<256>(); + } + return true; } diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp index f36cd2e..4c1c29c 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/ranges.equal.pass.cpp @@ -28,13 +28,17 @@ #include <concepts> #include <functional> #include <ranges> +#include <vector> #include "almost_satisfies_types.h" #include "test_iterators.h" +#include "test_macros.h" -template <class Iter1, class Sent1 = sentinel_wrapper<Iter1>, - class Iter2 = Iter1, class Sent2 = sentinel_wrapper<Iter2>> -concept HasEqualIt = requires (Iter1 first1, Sent1 last1, Iter2 first2, Sent2 last2) { +template <class Iter1, + class Sent1 = sentinel_wrapper<Iter1>, + class Iter2 = Iter1, + class Sent2 = sentinel_wrapper<Iter2>> +concept HasEqualIt = requires(Iter1 first1, Sent1 last1, Iter2 first2, Sent2 last2) { std::ranges::equal(first1, last1, first2, last2); }; @@ -52,9 +56,7 @@ static_assert(!HasEqualIt<int*, int*, int*, SentinelForNotWeaklyEqualityComparab static_assert(!HasEqualIt<int*, int*, int**>); template <class Range1, class Range2> -concept HasEqualR = requires (Range1 range1, Range2 range2) { - std::ranges::equal(range1, range2); -}; +concept HasEqualR = requires(Range1 range1, Range2 range2) { std::ranges::equal(range1, range2); }; static_assert(HasEqualR<UncheckedRange<int*>, UncheckedRange<int*>>); static_assert(!HasEqualR<InputRangeNotDerivedFrom, UncheckedRange<int*>>); @@ -75,15 +77,15 @@ constexpr void test_iterators() { { int a[] = {1, 2, 3, 4}; int b[] = {1, 2, 3, 4}; - std::same_as<bool> decltype(auto) ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), - Iter2(b), Sent2(Iter2(b + 4))); + std::same_as<bool> decltype(auto) ret = + std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4))); assert(ret); } { - int a[] = {1, 2, 3, 4}; - int b[] = {1, 2, 3, 4}; - auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4))); - auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4))); + int a[] = {1, 2, 3, 4}; + int b[] = {1, 2, 3, 4}; + auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4))); + auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4))); std::same_as<bool> decltype(auto) ret = std::ranges::equal(range1, range2); assert(ret); } @@ -92,12 +94,12 @@ constexpr void test_iterators() { { // check that false is returned for non-equal ranges { int a[] = {1, 2, 3, 4}; - int b[] = {1, 2, 4, 4}; + int b[] = {1, 2, 4, 4}; assert(!std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4)))); } { - int a[] = {1, 2, 3, 4}; - int b[] = {1, 2, 4, 4}; + int a[] = {1, 2, 3, 4}; + int b[] = {1, 2, 4, 4}; auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4))); auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4))); assert(!std::ranges::equal(range1, range2)); @@ -106,95 +108,96 @@ constexpr void test_iterators() { { // check that the predicate is used (return true) { - int a[] = {1, 2, 3, 4}; - int b[] = {2, 3, 4, 5}; - auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4)), - [](int l, int r) { return l != r; }); + int a[] = {1, 2, 3, 4}; + int b[] = {2, 3, 4, 5}; + auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4)), [](int l, int r) { + return l != r; + }); assert(ret); } { - int a[] = {1, 2, 3, 4}; - int b[] = {2, 3, 4, 5}; + int a[] = {1, 2, 3, 4}; + int b[] = {2, 3, 4, 5}; auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4))); auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4))); - auto ret = std::ranges::equal(range1, range2, [](int l, int r) { return l != r; }); + auto ret = std::ranges::equal(range1, range2, [](int l, int r) { return l != r; }); assert(ret); } } { // check that the predicate is used (return false) { - int a[] = {1, 2, 3, 4}; - int b[] = {2, 3, 3, 5}; - auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4)), - [](int l, int r) { return l != r; }); + int a[] = {1, 2, 3, 4}; + int b[] = {2, 3, 3, 5}; + auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 4)), [](int l, int r) { + return l != r; + }); assert(!ret); } { - int a[] = {1, 2, 3, 4}; - int b[] = {2, 3, 3, 5}; + int a[] = {1, 2, 3, 4}; + int b[] = {2, 3, 3, 5}; auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4))); auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 4))); - auto ret = std::ranges::equal(range1, range2, [](int l, int r) { return l != r; }); + auto ret = std::ranges::equal(range1, range2, [](int l, int r) { return l != r; }); assert(!ret); } } { // check that the projections are used { - int a[] = {1, 2, 3, 4, 5}; - int b[] = {6, 10, 14, 18, 22}; - auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 5)), - Iter2(b), Sent2(Iter2(b + 5)), - {}, - [](int i) { return i * 4; }, - [](int i) { return i - 2; }); + int a[] = {1, 2, 3, 4, 5}; + int b[] = {6, 10, 14, 18, 22}; + auto ret = std::ranges::equal( + Iter1(a), + Sent1(Iter1(a + 5)), + Iter2(b), + Sent2(Iter2(b + 5)), + {}, + [](int i) { return i * 4; }, + [](int i) { return i - 2; }); assert(ret); } { - int a[] = {1, 2, 3, 4, 5}; - int b[] = {6, 10, 14, 18, 22}; + int a[] = {1, 2, 3, 4, 5}; + int b[] = {6, 10, 14, 18, 22}; auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 5))); auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 5))); - auto ret = std::ranges::equal(range1, - range2, - {}, - [](int i) { return i * 4; }, - [](int i) { return i - 2; }); + auto ret = std::ranges::equal(range1, range2, {}, [](int i) { return i * 4; }, [](int i) { return i - 2; }); assert(ret); } } { // check that different sized ranges work { - int a[] = {4, 3, 2, 1}; - int b[] = {4, 3, 2, 1, 5, 6, 7}; + int a[] = {4, 3, 2, 1}; + int b[] = {4, 3, 2, 1, 5, 6, 7}; auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 4)), Iter2(b), Sent2(Iter2(b + 7))); assert(!ret); } { - int a[] = {4, 3, 2, 1}; - int b[] = {4, 3, 2, 1, 5, 6, 7}; + int a[] = {4, 3, 2, 1}; + int b[] = {4, 3, 2, 1, 5, 6, 7}; auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 4))); auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 7))); - auto ret = std::ranges::equal(range1, range2); + auto ret = std::ranges::equal(range1, range2); assert(!ret); } } { // check that two ranges with the same size but different values are different { - int a[] = {4, 6, 34, 76, 5}; - int b[] = {4, 6, 34, 67, 5}; + int a[] = {4, 6, 34, 76, 5}; + int b[] = {4, 6, 34, 67, 5}; auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 5)), Iter2(b), Sent2(Iter2(b + 5))); assert(!ret); } { - int a[] = {4, 6, 34, 76, 5}; - int b[] = {4, 6, 34, 67, 5}; + int a[] = {4, 6, 34, 76, 5}; + int b[] = {4, 6, 34, 67, 5}; auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 5))); auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 5))); - auto ret = std::ranges::equal(range1, range2); + auto ret = std::ranges::equal(range1, range2); assert(!ret); } } @@ -211,7 +214,7 @@ constexpr void test_iterators() { std::array<int, 0> b = {}; auto range1 = std::ranges::subrange(Iter1(a.data()), Sent1(Iter1(a.data()))); auto range2 = std::ranges::subrange(Iter2(b.data()), Sent2(Iter2(b.data()))); - auto ret = std::ranges::equal(range1, range2); + auto ret = std::ranges::equal(range1, range2); assert(ret); } } @@ -219,39 +222,39 @@ constexpr void test_iterators() { { // check that it works with the first range empty { std::array<int, 0> a = {}; - int b[] = {1, 2}; + int b[] = {1, 2}; auto ret = std::ranges::equal(Iter1(a.data()), Sent1(Iter1(a.data())), Iter2(b), Sent2(Iter2(b + 2))); assert(!ret); } { std::array<int, 0> a = {}; - int b[] = {1, 2}; + int b[] = {1, 2}; auto range1 = std::ranges::subrange(Iter1(a.data()), Sent1(Iter1(a.data()))); - auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 2))); - auto ret = std::ranges::equal(range1, range2); + auto range2 = std::ranges::subrange(Iter2(b), Sent2(Iter2(b + 2))); + auto ret = std::ranges::equal(range1, range2); assert(!ret); } } { // check that it works with the second range empty { - int a[] = {1, 2}; + int a[] = {1, 2}; std::array<int, 0> b = {}; auto ret = std::ranges::equal(Iter1(a), Sent1(Iter1(a + 2)), Iter2(b.data()), Sent2(Iter2(b.data()))); assert(!ret); } { - int a[] = {1, 2}; + int a[] = {1, 2}; std::array<int, 0> b = {}; - auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 2))); + auto range1 = std::ranges::subrange(Iter1(a), Sent1(Iter1(a + 2))); auto range2 = std::ranges::subrange(Iter2(b.data()), Sent2(Iter2(b.data()))); - auto ret = std::ranges::equal(range1, range2); + auto ret = std::ranges::equal(range1, range2); assert(!ret); } } } -template<class Iter1, class Sent1 = Iter1> +template <class Iter1, class Sent1 = Iter1> constexpr void test_iterators2() { test_iterators<Iter1, Sent1, cpp17_input_iterator<int*>, sentinel_wrapper<cpp17_input_iterator<int*>>>(); test_iterators<Iter1, Sent1, cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>(); @@ -263,6 +266,22 @@ constexpr void test_iterators2() { test_iterators<Iter1, Sent1, const int*>(); } +template <std::size_t N> +constexpr void test_vector_bool() { + std::vector<bool> in(N, false); + for (std::size_t i = 0; i < N; i += 2) + in[i] = true; + { // Test equal() with aligned bytes + std::vector<bool> out = in; + assert(std::ranges::equal(in, out)); + } + { // Test equal() with unaligned bytes + std::vector<bool> out(N + 8); + std::copy(in.begin(), in.end(), out.begin() + 4); + assert(std::ranges::equal(in.begin(), in.end(), out.begin() + 4, out.end() - 4)); + } +} + constexpr bool test() { test_iterators2<cpp17_input_iterator<int*>, sentinel_wrapper<cpp17_input_iterator<int*>>>(); test_iterators2<cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>(); @@ -281,40 +300,52 @@ constexpr bool test() { int i; }; { - S a[] = {7, 8, 9}; - S b[] = {7, 8, 9}; + S a[] = {7, 8, 9}; + S b[] = {7, 8, 9}; auto ret = std::ranges::equal(a, a + 3, b, b + 3, &S::equal, &S::identity, &S::i); assert(ret); } { - S a[] = {7, 8, 9}; - S b[] = {7, 8, 9}; + S a[] = {7, 8, 9}; + S b[] = {7, 8, 9}; auto ret = std::ranges::equal(a, b, &S::equal, &S::identity, &S::i); assert(ret); } } - { // check that the complexity requirements are met + { // check that the complexity requirements are met { // different size { - int a[] = {1, 2, 3}; - int b[] = {1, 2, 3, 4}; + int a[] = {1, 2, 3}; + int b[] = {1, 2, 3, 4}; int predCount = 0; int projCount = 0; - auto pred = [&](int l, int r) { ++predCount; return l == r; }; - auto proj = [&](int i) { ++projCount; return i; }; + auto pred = [&](int l, int r) { + ++predCount; + return l == r; + }; + auto proj = [&](int i) { + ++projCount; + return i; + }; auto ret = std::ranges::equal(a, a + 3, b, b + 4, pred, proj, proj); assert(!ret); assert(predCount == 0); assert(projCount == 0); } { - int a[] = {1, 2, 3}; - int b[] = {1, 2, 3, 4}; + int a[] = {1, 2, 3}; + int b[] = {1, 2, 3, 4}; int predCount = 0; int projCount = 0; - auto pred = [&](int l, int r) { ++predCount; return l == r; }; - auto proj = [&](int i) { ++projCount; return i; }; + auto pred = [&](int l, int r) { + ++predCount; + return l == r; + }; + auto proj = [&](int i) { + ++projCount; + return i; + }; auto ret = std::ranges::equal(a, b, pred, proj, proj); assert(!ret); assert(predCount == 0); @@ -324,27 +355,39 @@ constexpr bool test() { { // not a sized sentinel { - int a[] = {1, 2, 3}; - int b[] = {1, 2, 3, 4}; + int a[] = {1, 2, 3}; + int b[] = {1, 2, 3, 4}; int predCount = 0; int projCount = 0; - auto pred = [&](int l, int r) { ++predCount; return l == r; }; - auto proj = [&](int i) { ++projCount; return i; }; + auto pred = [&](int l, int r) { + ++predCount; + return l == r; + }; + auto proj = [&](int i) { + ++projCount; + return i; + }; auto ret = std::ranges::equal(a, sentinel_wrapper(a + 3), b, sentinel_wrapper(b + 4), pred, proj, proj); assert(!ret); assert(predCount <= 4); assert(projCount <= 7); } { - int a[] = {1, 2, 3}; - int b[] = {1, 2, 3, 4}; + int a[] = {1, 2, 3}; + int b[] = {1, 2, 3, 4}; int predCount = 0; int projCount = 0; - auto pred = [&](int l, int r) { ++predCount; return l == r; }; - auto proj = [&](int i) { ++projCount; return i; }; + auto pred = [&](int l, int r) { + ++predCount; + return l == r; + }; + auto proj = [&](int i) { + ++projCount; + return i; + }; auto range1 = std::ranges::subrange(a, sentinel_wrapper(a + 3)); auto range2 = std::ranges::subrange(b, sentinel_wrapper(b + 4)); - auto ret = std::ranges::equal(range1, range2, pred, proj, proj); + auto ret = std::ranges::equal(range1, range2, pred, proj, proj); assert(!ret); assert(predCount <= 4); assert(projCount <= 7); @@ -353,30 +396,52 @@ constexpr bool test() { { // same size { - int a[] = {1, 2, 3}; - int b[] = {1, 2, 3}; + int a[] = {1, 2, 3}; + int b[] = {1, 2, 3}; int predCount = 0; int projCount = 0; - auto pred = [&](int l, int r) { ++predCount; return l == r; }; - auto proj = [&](int i) { ++projCount; return i; }; + auto pred = [&](int l, int r) { + ++predCount; + return l == r; + }; + auto proj = [&](int i) { + ++projCount; + return i; + }; auto ret = std::ranges::equal(a, a + 3, b, b + 3, pred, proj, proj); assert(ret); assert(predCount == 3); assert(projCount == 6); } { - int a[] = {1, 2, 3}; - int b[] = {1, 2, 3}; + int a[] = {1, 2, 3}; + int b[] = {1, 2, 3}; int predCount = 0; int projCount = 0; - auto pred = [&](int l, int r) { ++predCount; return l == r; }; - auto proj = [&](int i) { ++projCount; return i; }; + auto pred = [&](int l, int r) { + ++predCount; + return l == r; + }; + auto proj = [&](int i) { + ++projCount; + return i; + }; auto ret = std::ranges::equal(a, b, pred, proj, proj); assert(ret); assert(predCount == 3); assert(projCount == 6); } } + + { // Test vector<bool>::iterator optimization + test_vector_bool<8>(); + test_vector_bool<19>(); + test_vector_bool<32>(); + test_vector_bool<49>(); + test_vector_bool<64>(); + test_vector_bool<199>(); + test_vector_bool<256>(); + } } return true; |