10 files changed, 154 insertions, 49 deletions
diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst
index ada8b41..25d33a9 100644
--- a/libcxx/docs/ReleaseNotes/22.rst
+++ b/libcxx/docs/ReleaseNotes/22.rst
@@ -62,6 +62,7 @@ Improvements and New Features
   has been improved by up to 3x
 - The performance of ``insert(iterator, iterator)`` of ``map``, ``set``, ``multimap`` and ``multiset`` has been improved
   by up to 2.5x
+- The performance of ``erase(iterator, iterator)`` in the unordered containers has been improved by up to 1.9x
 - The performance of ``map::insert_or_assign`` has been improved by up to 2x
 - ``ofstream::write`` has been optimized to pass through large strings to system calls directly instead of copying them
   in chunks into a buffer.
@@ -75,8 +76,8 @@ Improvements and New Features
 - The ``std::{fill, fill_n}`` and ``std::ranges::{fill, fill_n}`` algorithms have been optimized for segmented iterators,
   resulting in a performance improvement of at least 10x for ``std::deque<int>`` iterators and
   ``std::join_view<std::vector<std::vector<int>>>`` iterators.
-- The ``std::generate`` algorithm has been optimized for segmented iterators, resulting in a performance improvement for
-  ``std::deque<short>`` and ``std::join_view<vector<vector<short>>>`` iterators.
+- The ``std::generate`` and ``std::generate_n`` algorithms have been optimized for segmented iterators, resulting in a
+  performance improvement for ``std::deque<short>`` and ``std::join_view<vector<vector<short>>>`` iterators.
 
 Deprecations and Removals
 -------------------------
diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h
index e31fcae..6fb66d2 100644
--- a/libcxx/include/__algorithm/for_each.h
+++ b/libcxx/include/__algorithm/for_each.h
@@ -16,15 +16,11 @@
 #include <__iterator/segmented_iterator.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/invoke.h>
-#include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _InputIterator, class _Sent, class _Func, class _Proj>
@@ -60,6 +56,4 @@ for_each(_InputIterator __first, _InputIterator __last, _Func __f) {
 
 _LIBCPP_END_NAMESPACE_STD
 
-_LIBCPP_POP_MACROS
-
 #endif // _LIBCPP___ALGORITHM_FOR_EACH_H
diff --git a/libcxx/include/__algorithm/generate_n.h b/libcxx/include/__algorithm/generate_n.h
index f36403f..e9da133 100644
--- a/libcxx/include/__algorithm/generate_n.h
+++ b/libcxx/include/__algorithm/generate_n.h
@@ -9,8 +9,10 @@
 #ifndef _LIBCPP___ALGORITHM_GENERATE_N_H
 #define _LIBCPP___ALGORITHM_GENERATE_N_H
 
+#include <__algorithm/for_each_n.h>
 #include <__config>
-#include <__utility/convert_to_integral.h>
+#include <__functional/identity.h>
+#include <__utility/forward.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -21,11 +23,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <class _OutputIterator, class _Size, class _Generator>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
 generate_n(_OutputIterator __first, _Size __orig_n, _Generator __gen) {
-  typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
-  _IntegralSize __n = __orig_n;
-  for (; __n > 0; ++__first, (void)--__n)
-    *__first = __gen();
-  return __first;
+  using __iter_ref = decltype(*__first);
+  __identity __proj;
+  auto __f = [&](__iter_ref __element) { std::forward<__iter_ref>(__element) = __gen(); };
+  return std::__for_each_n(__first, __orig_n, __f, __proj);
 }
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__cxx03/__algorithm/for_each.h b/libcxx/include/__cxx03/__algorithm/for_each.h
index d160a9e..1ffb013 100644
--- a/libcxx/include/__cxx03/__algorithm/for_each.h
+++ b/libcxx/include/__cxx03/__algorithm/for_each.h
@@ -14,15 +14,11 @@
 #include <__cxx03/__config>
 #include <__cxx03/__iterator/segmented_iterator.h>
 #include <__cxx03/__type_traits/enable_if.h>
-#include <__cxx03/__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
-_LIBCPP_PUSH_MACROS
-#include <__cxx03/__undef_macros>
-
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _InputIterator, class _Function>
@@ -34,6 +30,4 @@ _LIBCPP_HIDE_FROM_ABI _Function for_each(_InputIterator __first, _InputIterator
 
 _LIBCPP_END_NAMESPACE_STD
 
-_LIBCPP_POP_MACROS
-
 #endif // _LIBCPP___CXX03___ALGORITHM_FOR_EACH_H
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index 6b65e73..5432abb 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -1037,7 +1037,21 @@ private:
   }
   _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__hash_table&, false_type) _NOEXCEPT {}
 
-  _LIBCPP_HIDE_FROM_ABI void __deallocate_node(__next_pointer __np) _NOEXCEPT;
+  _LIBCPP_HIDE_FROM_ABI void __deallocate_node(__node_pointer __nd) _NOEXCEPT {
+    auto& __alloc = __node_alloc();
+    __node_traits::destroy(__alloc, std::addressof(__nd->__get_value()));
+    std::__destroy_at(std::__to_address(__nd));
+    __node_traits::deallocate(__alloc, __nd, 1);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void __deallocate_node_list(__next_pointer __np) _NOEXCEPT {
+    while (__np != nullptr) {
+      __next_pointer __next = __np->__next_;
+      __deallocate_node(__np->__upcast());
+      __np = __next;
+    }
+  }
+
   _LIBCPP_HIDE_FROM_ABI __next_pointer __detach() _NOEXCEPT;
 
   template <class _From, class _ValueT = _Tp, __enable_if_t<__is_hash_value_type<_ValueT>::value, int> = 0>
@@ -1175,7 +1189,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::~__hash_table() {
   static_assert(is_copy_constructible<hasher>::value, "Hasher must be copy-constructible.");
 #endif
 
-  __deallocate_node(__first_node_.__next_);
+  __deallocate_node_list(__first_node_.__next_);
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
@@ -1251,7 +1265,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __other)
   // At this point we either have consumed the whole incoming hash table, or we don't have any more nodes to reuse in
   // the destination. Either continue with constructing new nodes, or deallocate the left over nodes.
   if (__own_iter->__next_) {
-    __deallocate_node(__own_iter->__next_);
+    __deallocate_node_list(__own_iter->__next_);
     __own_iter->__next_ = nullptr;
   } else {
     __copy_construct(__other_iter, __own_iter, __current_chash);
@@ -1263,19 +1277,6 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __other)
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
-void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__deallocate_node(__next_pointer __np) _NOEXCEPT {
-  __node_allocator& __na = __node_alloc();
-  while (__np != nullptr) {
-    __next_pointer __next    = __np->__next_;
-    __node_pointer __real_np = __np->__upcast();
-    __node_traits::destroy(__na, std::addressof(__real_np->__get_value()));
-    std::__destroy_at(std::addressof(*__real_np));
-    __node_traits::deallocate(__na, __real_np, 1);
-    __np = __next;
-  }
-}
-
-template <class _Tp, class _Hash, class _Equal, class _Alloc>
 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__detach() _NOEXCEPT {
   size_type __bc = bucket_count();
@@ -1318,7 +1319,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(__hash_table& __u,
     max_load_factor() = __u.max_load_factor();
     if (bucket_count() != 0) {
       __next_pointer __cache = __detach();
-      auto __guard           = std::__make_scope_guard([&] { __deallocate_node(__cache); });
+      auto __guard           = std::__make_scope_guard([&] { __deallocate_node_list(__cache); });
       const_iterator __i     = __u.begin();
       while (__cache != nullptr && __u.size() != 0) {
         __assign_value(__cache->__upcast()->__get_value(), std::move(__u.remove(__i++)->__get_value()));
@@ -1353,7 +1354,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_unique(_InputIterator __
 
   if (bucket_count() != 0) {
     __next_pointer __cache = __detach();
-    auto __guard           = std::__make_scope_guard([&] { __deallocate_node(__cache); });
+    auto __guard           = std::__make_scope_guard([&] { __deallocate_node_list(__cache); });
     for (; __cache != nullptr && __first != __last; ++__first) {
       __assign_value(__cache->__upcast()->__get_value(), *__first);
       __next_pointer __next = __cache->__next_;
@@ -1374,7 +1375,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_multi(_InputIterator __f
                 "__assign_multi may only be called with the containers value type or the nodes value type");
   if (bucket_count() != 0) {
     __next_pointer __cache = __detach();
-    auto __guard           = std::__make_scope_guard([&] { __deallocate_node(__cache); });
+    auto __guard           = std::__make_scope_guard([&] { __deallocate_node_list(__cache); });
     for (; __cache != nullptr && __first != __last; ++__first) {
       __assign_value(__cache->__upcast()->__get_value(), *__first);
       __next_pointer __next = __cache->__next_;
@@ -1413,7 +1414,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::end() const _NOEXCEPT {
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 void __hash_table<_Tp, _Hash, _Equal, _Alloc>::clear() _NOEXCEPT {
   if (size() > 0) {
-    __deallocate_node(__first_node_.__next_);
+    __deallocate_node_list(__first_node_.__next_);
     __first_node_.__next_ = nullptr;
     size_type __bc        = bucket_count();
     for (size_type __i = 0; __i < __bc; ++__i)
@@ -1873,12 +1874,61 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p) {
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_iterator __last) {
-  for (const_iterator __p = __first; __first != __last; __p = __first) {
-    ++__first;
-    erase(__p);
+  if (__first == __last)
+    return iterator(__last.__node_);
+
+  // current node
+  __next_pointer __current = __first.__node_;
+  size_type __bucket_count = bucket_count();
+  size_t __chash           = std::__constrain_hash(__current->__hash(), __bucket_count);
+  // find previous node
+  __next_pointer __before_first = __bucket_list_[__chash];
+  for (; __before_first->__next_ != __current; __before_first = __before_first->__next_)
+    ;
+
+  __next_pointer __last_node = __last.__node_;
+
+  // If __before_first is in the same bucket (i.e. the first element we erase is not the first in the bucket), clear
+  // this bucket first without re-linking it
+  if (__before_first != __first_node_.__ptr() &&
+      std::__constrain_hash(__before_first->__hash(), __bucket_count) == __chash) {
+    while (__current != __last_node) {
+      auto __next = __current->__next_;
+      __deallocate_node(__current->__upcast());
+      __current = __next;
+      --__size_;
+
+      if (__next) {
+        if (auto __next_chash = std::__constrain_hash(__next->__hash(), __bucket_count); __next_chash != __chash) {
+          __bucket_list_[__next_chash] = __before_first;
+          __chash                      = __next_chash;
+          break;
+        }
+      }
+    }
+  }
+
+  while (__current != __last_node) {
+    auto __next = __current->__next_;
+    __deallocate_node(__current->__upcast());
+    __current = __next;
+    --__size_;
+
+    // When switching buckets, set the old bucket to be empty and update the next bucket to have __before_first as its
+    // before-first element
+    if (__next) {
+      if (auto __next_chash = std::__constrain_hash(__next->__hash(), __bucket_count); __next_chash != __chash) {
+        __bucket_list_[__chash]      = nullptr;
+        __bucket_list_[__next_chash] = __before_first;
+        __chash                      = __next_chash;
+      }
+    }
   }
-  __next_pointer __np = __last.__node_;
-  return iterator(__np);
+
+  // re-link __before_first with __last
+  __before_first->__next_ = __current;
+
+  return iterator(__last.__node_);
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/generate_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/generate_n.pass.cpp
index 13fd1cb..525737c 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/generate_n.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/generate_n.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <algorithm>
 #include <cassert>
+#include <deque>
 
 #include "test_iterators.h"
 #include "test_macros.h"
@@ -71,12 +72,22 @@ test()
     test2<Iter, long double>();
 }
 
+void deque_test() {
+  int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049};
+  for (const int size : sizes) {
+    std::deque<int> d(size);
+    std::generate_n(d.begin(), size, gen_test());
+    assert(std::all_of(d.begin(), d.end(), [](int x) { return x == 2; }));
+  }
+}
+
 int main(int, char**)
 {
     test<forward_iterator<int*> >();
     test<bidirectional_iterator<int*> >();
     test<random_access_iterator<int*> >();
     test<int*>();
+    deque_test();
 
 #if TEST_STD_VER > 17
     static_assert(test_constexpr());
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp
index f8a2bdd..38b75c0 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp
@@ -91,6 +91,37 @@ int main(int, char**) {
     assert(c.size() == 0);
     assert(k == c.end());
   }
+  {   // Make sure we're properly destroying the elements when erasing
+    { // When erasing part of a bucket
+      std::unordered_multimap<int, std::string> map;
+      map.insert(std::make_pair(1, "This is a long string to make sure ASan can detect a memory leak"));
+      map.insert(std::make_pair(1, "This is another long string to make sure ASan can detect a memory leak"));
+      map.erase(++map.begin(), map.end());
+    }
+    { // When erasing the whole bucket
+      std::unordered_multimap<int, std::string> map;
+      map.insert(std::make_pair(1, "This is a long string to make sure ASan can detect a memory leak"));
+      map.insert(std::make_pair(1, "This is another long string to make sure ASan can detect a memory leak"));
+      map.erase(map.begin(), map.end());
+    }
+  }
+  { // Make sure that we're properly updating the bucket list when starting within a bucket
+    struct MyHash {
+      size_t operator()(size_t v) const { return v; }
+    };
+    std::unordered_multimap<size_t, size_t, MyHash> map;
+    size_t collision_val = 2 + map.bucket_count(); // try to get a bucket collision
+    map.rehash(3);
+    map.insert(std::pair<size_t, size_t>(1, 1));
+    map.insert(std::pair<size_t, size_t>(collision_val, 1));
+    map.insert(std::pair<size_t, size_t>(2, 1));
+    LIBCPP_ASSERT(map.bucket(2) == map.bucket(collision_val));
+
+    auto erase = map.equal_range(2);
+    map.erase(erase.first, erase.second);
+    for (const auto& v : map)
+      assert(v.first == 1 || v.first == collision_val);
+  }
 #if TEST_STD_VER >= 11
   {
     typedef std::unordered_multimap<int,
diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_range.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_range.pass.cpp
index 62724fd..3bc686e 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/erase_range.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/erase_range.pass.cpp
@@ -47,6 +47,23 @@ int main(int, char**) {
     assert(c.size() == 0);
     assert(k == c.end());
   }
+  { // Make sure that we're properly updating the bucket list when starting within a bucket
+    struct MyHash {
+      size_t operator()(size_t v) const { return v; }
+    };
+    std::unordered_multiset<size_t, MyHash> map;
+    size_t collision_val = 2 + map.bucket_count(); // try to get a bucket collision
+    map.rehash(3);
+    map.insert(1);
+    map.insert(collision_val);
+    map.insert(2);
+    LIBCPP_ASSERT(map.bucket(2) == map.bucket(collision_val));
+
+    auto erase = map.equal_range(2);
+    map.erase(erase.first, erase.second);
+    for (const auto& v : map)
+      assert(v == 1 || v == collision_val);
+  }
 #if TEST_STD_VER >= 11
   {
     typedef std::unordered_multiset<int, std::hash<int>, std::equal_to<int>, min_allocator<int>> C;
diff --git a/libcxx/utils/compare-benchmarks b/libcxx/utils/compare-benchmarks
index d165c73..b5bd880 100755
--- a/libcxx/utils/compare-benchmarks
+++ b/libcxx/utils/compare-benchmarks
@@ -72,7 +72,7 @@ def plain_text_comparison(data, metric, baseline_name=None, candidate_name=None)
     geomean_0 = statistics.geometric_mean(data[f'{metric}_0'].dropna())
     geomean_1 = statistics.geometric_mean(data[f'{metric}_1'].dropna())
     geomean_row = ['Geomean', geomean_0, geomean_1, (geomean_1 - geomean_0), (geomean_1 - geomean_0) / geomean_0]
-    table.loc[table.index.max()] = geomean_row
+    table.loc[table.index.max() + 1] = geomean_row
 
     return tabulate.tabulate(table.set_index('benchmark'), headers=headers, floatfmt=fmt, numalign='right')
 
diff --git a/libcxx/utils/visualize-historical b/libcxx/utils/visualize-historical
index 114c7e8..0a94b15 100755
--- a/libcxx/utils/visualize-historical
+++ b/libcxx/utils/visualize-historical
@@ -112,7 +112,7 @@ def truncate_lines(string, n, marker=None):
     assert len(truncated) <= n, "broken post-condition"
     return '\n'.join(truncated)
 
-def create_plot(data, metric, subtitle=None):
+def create_plot(data, metric, trendline=None, subtitle=None):
     """
     Create a plot object showing the evolution of each benchmark throughout the given commits for
     the given metric.
@@ -126,7 +126,7 @@ def create_plot(data, metric, subtitle=None):
                                           symbol='benchmark',
                                           color='benchmark',
                                           hover_name=[hover_info[c] for c in data['commit']],
-                                          trendline="lowess")
+                                          trendline=trendline)
     return figure
 
 def directory_path(string):
@@ -221,6 +221,10 @@ def main(argv):
     parser.add_argument('--open', action='store_true',
         help='Whether to automatically open the generated HTML file when finished. If no output file is provided, '
              'the resulting benchmark is opened automatically by default.')
+    parser.add_argument('--trendline', type=str, required=False, default=None, choices=('ols', 'lowess', 'expanding'),
+        help='Optional trendline to add on each series in the chart. See the documentation in '
+             'https://plotly.com/python-api-reference/generated/plotly.express.trendline_functions.html '
+             'details on each option.')
     args = parser.parse_args(argv)
 
     # Extract benchmark data from the directory.
@@ -250,9 +254,11 @@ def main(argv):
     if args.filter is not None:
         keeplist = [b for b in data['benchmark'] if re.search(args.filter, b) is not None]
         data = data[data['benchmark'].isin(keeplist)]
+        if len(data) == 0:
+            raise RuntimeError(f'Filter "{args.filter}" resulted in empty data set -- nothing to plot')
 
     # Plot the data for all the required benchmarks.
-    figure = create_plot(data, args.metric, subtitle=args.subtitle)
+    figure = create_plot(data, args.metric, trendline=args.trendline, subtitle=args.subtitle)
     do_open = args.output is None or args.open
     output = args.output if args.output is not None else tempfile.NamedTemporaryFile(suffix='.html').name
     plotly.io.write_html(figure, file=output, auto_open=do_open)