aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohannes Singler <singler@ira.uka.de>2008-04-07 08:27:34 +0000
committerJohannes Singler <singler@gcc.gnu.org>2008-04-07 08:27:34 +0000
commitf9985df51b32cb265f429d024e955e71fd23a4f1 (patch)
treee2e44f1d16ae320c57bc274b9ec59d0eb8838d7e
parent1d4cd3d0b5e866a6b0ce1bea8dbf0526f794dffe (diff)
downloadgcc-f9985df51b32cb265f429d024e955e71fd23a4f1.zip
gcc-f9985df51b32cb265f429d024e955e71fd23a4f1.tar.gz
gcc-f9985df51b32cb265f429d024e955e71fd23a4f1.tar.bz2
re PR libstdc++/35588 ([parallel mode] parallel std::sort and bind())
2008-04-07 Johannes Singler <singler@ira.uka.de> * include/parallel/multiway_merge.h: Moved decisions to compiletime instead of runtime. * include/parallel/losertree.h: Removed obsolete variants, added variant that uses pointers in the loser tree. * include/parallel/types.h: Remove obsolete settings options from enum. * include/parallel/features.h: Remove obsolete compile-time switches. * include/parallel/compiletime_settings.h: Remove obsolete variant that copies back *after* sorting. * include/parallel/tags.h: Add one new tag for compile-time switch. * include/parallel/merge.h: Adapt to changes in multiway_merge.h. * include/parallel/multiway_mergesort.h: Adapt to changes in multiway_merge.h. Factor out splitting variants. Remove obsolete variant that copies back *after* sorting. * include/parallel/sort.h: Adapt to changes in multiway_mergesort.h. * testsuite/25_algorithms/sort/35588.cc: Added test case from / for PR 35588. From-SVN: r133975
-rw-r--r--libstdc++-v3/ChangeLog26
-rw-r--r--libstdc++-v3/include/parallel/compiletime_settings.h10
-rw-r--r--libstdc++-v3/include/parallel/features.h60
-rw-r--r--libstdc++-v3/include/parallel/losertree.h1883
-rw-r--r--libstdc++-v3/include/parallel/merge.h17
-rw-r--r--libstdc++-v3/include/parallel/multiway_merge.h2319
-rw-r--r--libstdc++-v3/include/parallel/multiway_mergesort.h420
-rw-r--r--libstdc++-v3/include/parallel/sort.h44
-rw-r--r--libstdc++-v3/include/parallel/tags.h3
-rw-r--r--libstdc++-v3/include/parallel/types.h11
-rw-r--r--libstdc++-v3/testsuite/25_algorithms/sort/35588.cc32
11 files changed, 2487 insertions, 2338 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index f590074..e452aaa 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,29 @@
+2008-04-07 Johannes Singler <singler@ira.uka.de>
+
+ * include/parallel/multiway_merge.h:
+ Moved decisions to compiletime instead of runtime.
+ * include/parallel/losertree.h:
+ Removed obsolete variants, added variant that uses pointers
+ in the loser tree.
+ * include/parallel/types.h:
+ Remove obsolete settings options from enum.
+ * include/parallel/features.h:
+ Remove obsolete compile-time switches.
+ * include/parallel/compiletime_settings.h:
+ Remove obsolete variant that copies back *after* sorting.
+ * include/parallel/tags.h:
+ Add one new tag for compile-time switch.
+ * include/parallel/merge.h:
+ Adapt to changes in multiway_merge.h.
+ * include/parallel/multiway_mergesort.h:
+ Adapt to changes in multiway_merge.h.
+ Factor out splitting variants.
+ Remove obsolete variant that copies back *after* sorting.
+ * include/parallel/sort.h:
+ Adapt to changes in multiway_mergesort.h.
+ * testsuite/25_algorithms/sort/35588.cc:
+ Added test case from / for PR 35588.
+
2008-03-29 Paolo Carlini <pcarlini@suse.de>
PR libstdc++/35725
diff --git a/libstdc++-v3/include/parallel/compiletime_settings.h b/libstdc++-v3/include/parallel/compiletime_settings.h
index edaea38..8ab89aa 100644
--- a/libstdc++-v3/include/parallel/compiletime_settings.h
+++ b/libstdc++-v3/include/parallel/compiletime_settings.h
@@ -73,17 +73,9 @@
* __gnu_parallel::parallel_random_shuffle(). */
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0
#endif
-#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
+#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
* Consider the size of the TLB for
* __gnu_parallel::parallel_random_shuffle(). */
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0
#endif
-
-#ifndef _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
-/** @brief First copy the data, sort it locally, and merge it back
- * (0); or copy it back after everything is done (1).
- *
- * Recommendation: 0 */
-#define _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST 0
-#endif
diff --git a/libstdc++-v3/include/parallel/features.h b/libstdc++-v3/include/parallel/features.h
index 2e09980..7150c20 100644
--- a/libstdc++-v3/include/parallel/features.h
+++ b/libstdc++-v3/include/parallel/features.h
@@ -61,66 +61,6 @@
#define _GLIBCXX_BAL_QUICKSORT 1
#endif
-#ifndef _GLIBCXX_LOSER_TREE
-/** @def _GLIBCXX_LOSER_TREE
- * @brief Include guarded (sequences may run empty) loser tree,
- * moving objects.
- * @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE 1
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_EXPLICIT
-/** @def _GLIBCXX_LOSER_TREE_EXPLICIT
- * @brief Include standard loser tree, storing two flags for infimum
- * and supremum.
- * @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_EXPLICIT 0
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_REFERENCE
-/** @def _GLIBCXX_LOSER_TREE_REFERENCE
- * @brief Include some loser tree variant.
- * @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_REFERENCE 0
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_POINTER
-/** @def _GLIBCXX_LOSER_TREE_POINTER
- * @brief Include some loser tree variant.
- * @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_POINTER 1
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_UNGUARDED
-/** @def _GLIBCXX_LOSER_TREE_UNGUARDED
- * @brief Include unguarded (sequences must not run empty) loser
- * tree, moving objects.
- * @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_UNGUARDED 0
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
-/** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
- * @brief Include some loser tree variant.
- * @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 1
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_COMBINED
-/** @def _GLIBCXX_LOSER_TREE_COMBINED
- * @brief Include some loser tree variant.
- * @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_COMBINED 0
-#endif
-
-#ifndef _GLIBCXX_LOSER_TREE_SENTINEL
-/** @def _GLIBCXX_LOSER_TREE_SENTINEL
- * @brief Include some loser tree variant.
- * @see __gnu_parallel::_Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE_SENTINEL 0
-#endif
-
-
#ifndef _GLIBCXX_FIND_GROWING_BLOCKS
/** @brief Include the growing blocks variant for std::find.
* @see __gnu_parallel::_Settings::find_algorithm */
diff --git a/libstdc++-v3/include/parallel/losertree.h b/libstdc++-v3/include/parallel/losertree.h
index ddeb0d3..cae15c0 100644
--- a/libstdc++-v3/include/parallel/losertree.h
+++ b/libstdc++-v3/include/parallel/losertree.h
@@ -47,878 +47,725 @@
namespace __gnu_parallel
{
-#if _GLIBCXX_LOSER_TREE_EXPLICIT
-
-/** @brief Guarded loser tree, copying the whole element into the
-* tree structure.
-*
-* Guarding is done explicitly through two flags per element, inf
-* and sup This is a quite slow variant.
-*/
-template<typename T, typename Comparator = std::less<T> >
- class LoserTreeExplicit
+/**
+ * @brief Guarded loser/tournament tree.
+ *
+ * The smallest element is at the top.
+ *
+ * Guarding is done explicitly through one flag sup per element,
+ * inf is not needed due to a better initialization routine. This
+ * is a well-performing variant.
+ *
+ * @param T the element type
+ * @param Comparator the comparator to use, defaults to std::less<T>
+ */
+template<typename T, typename Comparator>
+class LoserTreeBase
+{
+protected:
+ /** @brief Internal representation of a LoserTree element. */
+ struct Loser
{
- private:
- struct Loser
- {
- // The relevant element.
- T key;
-
- // Is this an infimum or supremum element?
- bool inf, sup;
-
- // Number of the sequence the element comes from.
- int source;
- };
-
- unsigned int size, offset;
- Loser* losers;
- Comparator comp;
-
- public:
- LoserTreeExplicit(unsigned int _size, Comparator _comp = std::less<T>())
- : comp(_comp)
- {
- size = _size;
- offset = size;
- losers = new Loser[size];
- for (unsigned int l = 0; l < size; ++l)
- {
- //losers[l].key = ... stays unset
- losers[l].inf = true;
- losers[l].sup = false;
- //losers[l].source = -1; //sentinel
- }
- }
-
- ~LoserTreeExplicit()
- { delete[] losers; }
+ /** @brief flag, true iff this is a "maximum" sentinel. */
+ bool sup;
+ /** @brief index of the source sequence. */
+ int source;
+ /** @brief key of the element in the LoserTree. */
+ T key;
+ };
- int
- get_min_source()
- { return losers[0].source; }
+ unsigned int ik, k, offset;
+
+ /** log_2{k} */
+ unsigned int _M_log_k;
+
+ /** @brief LoserTree elements. */
+ Loser* losers;
+
+ /** @brief Comparator to use. */
+ Comparator comp;
+
+ /**
+ * @brief State flag that determines whether the LoserTree is empty.
+ *
+ * Only used for building the LoserTree.
+ */
+ bool first_insert;
+
+public:
+ /**
+ * @brief The constructor.
+ *
+ * @param _k The number of sequences to merge.
+ * @param _comp The comparator to use.
+ */
+ LoserTreeBase(unsigned int _k, Comparator _comp)
+ : comp(_comp)
+ {
+ ik = _k;
+
+ // Compute log_2{k} for the Loser Tree
+ _M_log_k = log2(ik - 1) + 1;
+
+ // Next greater power of 2.
+ k = 1 << _M_log_k;
+ offset = k;
+
+ // Avoid default-constructing losers[].key
+ losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
+ for (unsigned int i = ik - 1; i < k; ++i)
+ losers[i + k].sup = true;
+
+ first_insert = true;
+ }
+
+ /**
+ * @brief The destructor.
+ */
+ ~LoserTreeBase()
+ { ::operator delete(losers); }
+
+ /**
+ * @brief Initializes the sequence "source" with the element "key".
+ *
+ * @param key the element to insert
+ * @param source index of the source sequence
+ * @param sup flag that determines whether the value to insert is an
+ * explicit supremum.
+ */
+ inline void
+ insert_start(const T& key, int source, bool sup)
+ {
+ unsigned int pos = k + source;
+
+ if(first_insert)
+ {
+ // Construct all keys, so we can easily deconstruct them.
+ for (unsigned int i = 0; i < (2 * k); ++i)
+ new(&(losers[i].key)) T(key);
+ first_insert = false;
+ }
+ else
+ new(&(losers[pos].key)) T(key);
+
+ losers[pos].sup = sup;
+ losers[pos].source = source;
+ }
+
+ /**
+ * @return the index of the sequence with the smallest element.
+ */
+ int get_min_source()
+ { return losers[0].source; }
+};
+
+/**
+ * @brief Stable LoserTree variant.
+ *
+ * Provides the stable implementations of insert_start, init_winner,
+ * init and delete_min_insert.
+ *
+ * Unstable variant is done using partial specialisation below.
+ */
+template<bool stable/* default == true */, typename T, typename Comparator>
+class LoserTree : public LoserTreeBase<T, Comparator>
+{
+ typedef LoserTreeBase<T, Comparator> Base;
+ using Base::k;
+ using Base::losers;
+ using Base::first_insert;
+
+public:
+ LoserTree(unsigned int _k, Comparator _comp)
+ : Base::LoserTreeBase(_k, _comp)
+ {}
+
+ unsigned int
+ init_winner(unsigned int root)
+ {
+ if (root >= k)
+ {
+ return root;
+ }
+ else
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+ if (losers[right].sup
+ || (!losers[left].sup
+ && !comp(losers[right].key, losers[left].key)))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
+ }
+
+ void init()
+ { losers[0] = losers[init_winner(1)]; }
+
+ /**
+ * @brief Delete the smallest element and insert a new element from
+ * the previously smallest element's sequence.
+ *
+ * This implementation is stable.
+ */
+ // Do not pass a const reference since key will be used as local variable.
+ void delete_min_insert(T key, bool sup)
+ {
+ int source = losers[0].source;
+ for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+ {
+ // The smaller one gets promoted, ties are broken by source.
+ if ((sup && (!losers[pos].sup || losers[pos].source < source))
+ || (!sup && !losers[pos].sup
+ && ((comp(losers[pos].key, key))
+ || (!comp(key, losers[pos].key)
+ && losers[pos].source < source))))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].key, key);
+ }
+ }
+
+ losers[0].sup = sup;
+ losers[0].source = source;
+ losers[0].key = key;
+ }
+};
+
+/**
+ * @brief Unstable LoserTree variant.
+ *
+ * Stability (non-stable here) is selected with partial specialization.
+ */
+template<typename T, typename Comparator>
+class LoserTree</* stable == */false, T, Comparator> :
+ public LoserTreeBase<T, Comparator>
+{
+ typedef LoserTreeBase<T, Comparator> Base;
+ using Base::_M_log_k;
+ using Base::k;
+ using Base::losers;
+ using Base::first_insert;
+
+public:
+ LoserTree(unsigned int _k, Comparator _comp)
+ : Base::LoserTreeBase(_k, _comp)
+ {}
+
+ /**
+ * Computes the winner of the competition at position "root".
+ *
+ * Called recursively (starting at 0) to build the initial tree.
+ *
+ * @param root index of the "game" to start.
+ */
+ unsigned int
+ init_winner (unsigned int root)
+ {
+ if (root >= k)
+ {
+ return root;
+ }
+ else
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+ if (losers[right].sup ||
+ (!losers[left].sup
+ && !comp(losers[right].key, losers[left].key)))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
+ }
+
+ inline void
+ init()
+ { losers[0] = losers[init_winner(1)]; }
+
+ /**
+ * Delete the key smallest element and insert the element key instead.
+ *
+ * @param key the key to insert
+ * @param sup true iff key is an explicitly marked supremum
+ */
+ // Do not pass a const reference since key will be used as local variable.
+ inline void
+ delete_min_insert(T key, bool sup)
+ {
+#if _GLIBCXX_ASSERTIONS
+ // loser trees are only used for at least 2 sequences
+ _GLIBCXX_PARALLEL_ASSERT(_M_log_k > 1);
+#endif
- void
- insert_start(T key, int source, bool sup)
+ int source = losers[0].source;
+ for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
- bool inf = false;
- for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
- {
- if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
- && comp(losers[pos].key, key)) || losers[pos].inf || sup)
- {
- // The other one is smaller.
- std::swap(losers[pos].key, key);
- std::swap(losers[pos].inf, inf);
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- }
- }
-
- losers[0].key = key;
- losers[0].inf = inf;
- losers[0].sup = sup;
- losers[0].source = source;
+ // The smaller one gets promoted.
+ if (sup || (!losers[pos].sup && comp(losers[pos].key, key)))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].key, key);
+ }
}
- void
- init() { }
+ losers[0].sup = sup;
+ losers[0].source = source;
+ losers[0].key = key;
+ }
+};
- void
- delete_min_insert(T key, bool sup)
- {
- bool inf = false;
- int source = losers[0].source;
- for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted.
- if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
- && comp(losers[pos].key, key))
- || losers[pos].inf || sup)
- {
- // The other one is smaller.
- std::swap(losers[pos].key, key);
- std::swap(losers[pos].inf, inf);
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- }
- }
-
- losers[0].key = key;
- losers[0].inf = inf;
- losers[0].sup = sup;
- losers[0].source = source;
- }
-
- void
- insert_start_stable(T key, int source, bool sup)
- {
- bool inf = false;
- for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
- {
- if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
- && ((comp(losers[pos].key, key))
- || (!comp(key, losers[pos].key)
- && losers[pos].source < source)))
- || losers[pos].inf || sup)
- {
- // Take next key.
- std::swap(losers[pos].key, key);
- std::swap(losers[pos].inf, inf);
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- }
- }
-
- losers[0].key = key;
- losers[0].inf = inf;
- losers[0].sup = sup;
- losers[0].source = source;
- }
- void
- init_stable() { }
-
- void
- delete_min_insert_stable(T key, bool sup)
- {
- bool inf = false;
- int source = losers[0].source;
- for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
- {
- if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
- && ((comp(losers[pos].key, key))
- || (!comp(key, losers[pos].key)
- && losers[pos].source < source)))
- || losers[pos].inf || sup)
- {
- std::swap(losers[pos].key, key);
- std::swap(losers[pos].inf, inf);
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- }
- }
-
- losers[0].key = key;
- losers[0].inf = inf;
- losers[0].sup = sup;
- losers[0].source = source;
- }
+/**
+ * @brief Base class of Loser Tree implementation using pointers.
+ */
+template<typename T, typename Comparator>
+class LoserTreePointerBase
+{
+protected:
+ /** @brief Internal representation of LoserTree elements. */
+ struct Loser
+ {
+ bool sup;
+ int source;
+ const T* keyp;
};
-#endif
-
-#if _GLIBCXX_LOSER_TREE
+ unsigned int ik, k, offset;
+ Loser* losers;
+ Comparator comp;
-/** @brief Guarded loser tree, either copying the whole element into
-* the tree structure, or looking up the element via the index.
-*
-* Guarding is done explicitly through one flag sup per element,
-* inf is not needed due to a better initialization routine. This
-* is a well-performing variant.
-*/
-template<typename T, typename Comparator = std::less<T> >
- class LoserTree
- {
- private:
- struct Loser
- {
- bool sup;
- int source;
- T key;
- };
-
- unsigned int ik, k, offset;
- Loser* losers;
- Comparator comp;
- bool first_insert;
-
- public:
- LoserTree(unsigned int _k, Comparator _comp = std::less<T>())
+public:
+ LoserTreePointerBase(unsigned int _k, Comparator _comp = std::less<T>())
: comp(_comp)
- {
- ik = _k;
-
- // Next greater power of 2.
- k = 1 << (log2(ik - 1) + 1);
- offset = k;
- // Avoid default-constructing losers[].key
- losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
- for (unsigned int i = ik - 1; i < k; ++i)
- losers[i + k].sup = true;
-
- first_insert = true;
- }
+ {
+ ik = _k;
- ~LoserTree()
- { ::operator delete(losers); }
+ // Next greater power of 2.
+ k = 1 << (log2(ik - 1) + 1);
+ offset = k;
+ losers = new Loser[k * 2];
+ for (unsigned int i = ik - 1; i < k; i++)
+ losers[i + k].sup = true;
+ }
- int
- get_min_source()
- { return losers[0].source; }
+ ~LoserTreePointerBase()
+ { ::operator delete(losers); }
- void
- insert_start(const T& key, int source, bool sup)
- {
- unsigned int pos = k + source;
-
- if(first_insert)
- {
- // Construct all keys, so we can easily deconstruct them.
- for (unsigned int i = 0; i < (2 * k); ++i)
- ::new(&(losers[i].key)) T(key);
- first_insert = false;
- }
- else
- ::new(&(losers[pos].key)) T(key);
-
- losers[pos].sup = sup;
- losers[pos].source = source;
- }
+ int get_min_source()
+ { return losers[0].source; }
- unsigned int
- init_winner (unsigned int root)
- {
- if (root >= k)
- {
- return root;
- }
- else
- {
- unsigned int left = init_winner (2 * root);
- unsigned int right = init_winner (2 * root + 1);
- if (losers[right].sup
- || (!losers[left].sup
- && !comp(losers[right].key, losers[left].key)))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
- }
+ void insert_start(const T& key, int source, bool sup)
+ {
+ unsigned int pos = k + source;
+
+ losers[pos].sup = sup;
+ losers[pos].source = source;
+ losers[pos].keyp = &key;
+ }
+};
+
+/**
+ * @brief Stable LoserTree implementation.
+ *
+ * The unstable variant is implemented using partial instantiation below.
+ */
+template<bool stable/* default == true */, typename T, typename Comparator>
+class LoserTreePointer : public LoserTreePointerBase<T, Comparator>
+{
+ typedef LoserTreePointerBase<T, Comparator> Base;
+ using Base::k;
+ using Base::losers;
- void
- init()
- { losers[0] = losers[init_winner(1)]; }
+public:
+ LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
+ : Base::LoserTreePointerBase(_k, _comp)
+ {}
- // Do not pass const reference since key will be used as local variable.
- void
- delete_min_insert(T key, bool sup)
- {
- int source = losers[0].source;
- for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted.
- if (sup || (!losers[pos].sup && comp(losers[pos].key, key)))
- {
- // The other one is smaller.
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].key, key);
- }
- }
-
- losers[0].sup = sup;
- losers[0].source = source;
- losers[0].key = key;
- }
+ unsigned int
+ init_winner(unsigned int root)
+ {
+ if (root >= k)
+ {
+ return root;
+ }
+ else
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+ if (losers[right].sup
+ || (!losers[left].sup && !comp(*losers[right].keyp,
+ *losers[left].keyp)))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
+ }
+
+ void init()
+ { losers[0] = losers[init_winner(1)]; }
+
+ void delete_min_insert(const T& key, bool sup)
+ {
+ const T* keyp = &key;
+ int source = losers[0].source;
+ for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+ {
+ // The smaller one gets promoted, ties are broken by source.
+ if ((sup && (!losers[pos].sup || losers[pos].source < source)) ||
+ (!sup && !losers[pos].sup &&
+ ((comp(*losers[pos].keyp, *keyp)) ||
+ (!comp(*keyp, *losers[pos].keyp)
+ && losers[pos].source < source))))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].keyp, keyp);
+ }
+ }
+
+ losers[0].sup = sup;
+ losers[0].source = source;
+ losers[0].keyp = keyp;
+ }
+};
+
+/**
+ * @brief Unstable LoserTree implementation.
+ *
+ * The stable variant is above.
+ */
+template<typename T, typename Comparator>
+class LoserTreePointer</* stable == */false, T, Comparator> :
+ public LoserTreePointerBase<T, Comparator>
+{
+ typedef LoserTreePointerBase<T, Comparator> Base;
+ using Base::k;
+ using Base::losers;
- void
- insert_start_stable(const T& key, int source, bool sup)
- { return insert_start(key, source, sup); }
+public:
+ LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
+ : Base::LoserTreePointerBase(_k, _comp)
+ {}
- unsigned int
- init_winner_stable (unsigned int root)
- {
- if (root >= k)
- {
- return root;
- }
- else
- {
- unsigned int left = init_winner (2 * root);
- unsigned int right = init_winner (2 * root + 1);
- if (losers[right].sup
+ unsigned int
+ init_winner(unsigned int root)
+ {
+ if (root >= k)
+ {
+ return root;
+ }
+ else
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+ if (losers[right].sup
|| (!losers[left].sup
- && !comp(losers[right].key, losers[left].key)))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
- }
-
- void
- init_stable()
- { losers[0] = losers[init_winner_stable(1)]; }
-
- // Do not pass const reference since key will be used as local variable.
- void
- delete_min_insert_stable(T key, bool sup)
- {
- int source = losers[0].source;
- for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted, ties are broken by source.
- if ( (sup && (!losers[pos].sup || losers[pos].source < source))
- || (!sup && !losers[pos].sup
- && ((comp(losers[pos].key, key))
- || (!comp(key, losers[pos].key)
- && losers[pos].source < source))))
- {
- // The other one is smaller.
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].key, key);
- }
- }
-
- losers[0].sup = sup;
- losers[0].source = source;
- losers[0].key = key;
- }
- };
-
-#endif
-
-#if _GLIBCXX_LOSER_TREE_REFERENCE
-
-/** @brief Guarded loser tree, either copying the whole element into
-* the tree structure, or looking up the element via the index.
-*
-* Guarding is done explicitly through one flag sup per element,
-* inf is not needed due to a better initialization routine. This
-* is a well-performing variant.
-*/
-template<typename T, typename Comparator = std::less<T> >
- class LoserTreeReference
+ && !comp(*losers[right].keyp, *losers[left].keyp)))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
+ }
+
+ void init()
+ { losers[0] = losers[init_winner(1)]; }
+
+ void delete_min_insert(const T& key, bool sup)
{
-#undef COPY
-#ifdef COPY
-#define KEY(i) losers[i].key
-#define KEY_SOURCE(i) key
-#else
-#define KEY(i) keys[losers[i].source]
-#define KEY_SOURCE(i) keys[i]
-#endif
- private:
- struct Loser
- {
- bool sup;
- int source;
-#ifdef COPY
- T key;
-#endif
- };
+ const T* keyp = &key;
+ int source = losers[0].source;
+ for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+ {
+ // The smaller one gets promoted.
+ if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp)))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].keyp, keyp);
+ }
+ }
+
+ losers[0].sup = sup;
+ losers[0].source = source;
+ losers[0].keyp = keyp;
+ }
+};
+
+/** @brief Base class for unguarded LoserTree implementation.
+ *
+ * The whole element is copied into the tree structure.
+ *
+ * No guarding is done, therefore not a single input sequence must
+ * run empty. Unused sequence heads are marked with a sentinel which
+ * is &gt; all elements that are to be merged.
+ *
+ * This is a very fast variant.
+ */
+template<typename T, typename Comparator>
+class LoserTreeUnguardedBase
+{
+protected:
+ struct Loser
+ {
+ int source;
+ T key;
+ };
- unsigned int ik, k, offset;
- Loser* losers;
-#ifndef COPY
- T* keys;
-#endif
- Comparator comp;
+ unsigned int ik, k, offset;
+ Loser* losers;
+ Comparator comp;
- public:
- LoserTreeReference(unsigned int _k, Comparator _comp = std::less<T>())
+public:
+ inline
+ LoserTreeUnguardedBase(unsigned int _k, const T _sentinel,
+ Comparator _comp = std::less<T>())
: comp(_comp)
- {
- ik = _k;
-
- // Next greater power of 2.
- k = 1 << (log2(ik - 1) + 1);
- offset = k;
- losers = new Loser[k * 2];
-#ifndef COPY
- keys = new T[ik];
-#endif
- for (unsigned int i = ik - 1; i < k; ++i)
- losers[i + k].sup = true;
- }
-
- ~LoserTreeReference()
- {
- delete[] losers;
-#ifndef COPY
- delete[] keys;
-#endif
- }
-
- int
- get_min_source()
- { return losers[0].source; }
-
- void
- insert_start(T key, int source, bool sup)
- {
- unsigned int pos = k + source;
-
- losers[pos].sup = sup;
- losers[pos].source = source;
- KEY(pos) = key;
- }
-
- unsigned int
- init_winner(unsigned int root)
- {
- if (root >= k)
- {
- return root;
- }
- else
- {
- unsigned int left = init_winner (2 * root);
- unsigned int right = init_winner (2 * root + 1);
- if ( losers[right].sup ||
- (!losers[left].sup && !comp(KEY(right), KEY(left))))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
- }
-
- void
- init()
- {
- losers[0] = losers[init_winner(1)];
- }
-
- void
- delete_min_insert(T key, bool sup)
- {
- int source = losers[0].source;
- for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted.
- if (sup || (!losers[pos].sup && comp(KEY(pos), KEY_SOURCE(source))))
- {
- // The other one is smaller.
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
-#ifdef COPY
- std::swap(KEY(pos), KEY_SOURCE(source));
-#endif
- }
- }
-
- losers[0].sup = sup;
- losers[0].source = source;
-#ifdef COPY
- KEY(0) = KEY_SOURCE(source);
+ {
+ ik = _k;
+
+ // Next greater power of 2.
+ k = 1 << (log2(ik - 1) + 1);
+ offset = k;
+ // Avoid default-constructing losers[].key
+ losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
+
+ for (unsigned int i = /*k + ik - 1*/0; i < (2 * k); ++i)
+ {
+ losers[i].key = _sentinel;
+ losers[i].source = -1;
+ }
+ }
+
+ inline ~LoserTreeUnguardedBase()
+ { ::operator delete(losers); }
+
+ inline int
+ get_min_source()
+ {
+ // no dummy sequence can ever be at the top!
+#if _GLIBCXX_ASSERTIONS
+ _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
- }
+ return losers[0].source;
+ }
- void
- insert_start_stable(T key, int source, bool sup)
- { return insert_start(key, source, sup); }
-
- unsigned int
- init_winner_stable(unsigned int root)
- {
- if (root >= k)
- {
- return root;
- }
- else
- {
- unsigned int left = init_winner (2 * root);
- unsigned int right = init_winner (2 * root + 1);
- if (losers[right].sup
- || (!losers[left].sup && !comp(KEY(right), KEY(left))))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
- }
-
- void
- init_stable()
- { losers[0] = losers[init_winner_stable(1)]; }
-
- void
- delete_min_insert_stable(T key, bool sup)
- {
- int source = losers[0].source;
- for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted, ties are broken by source.
- if ((sup && (!losers[pos].sup || losers[pos].source < source))
- || (!sup && !losers[pos].sup
- && ((comp(KEY(pos), KEY_SOURCE(source)))
- || (!comp(KEY_SOURCE(source), KEY(pos))
- && losers[pos].source < source))))
- {
- // The other one is smaller.
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
-#ifdef COPY
- std::swap(KEY(pos), KEY_SOURCE(source));
-#endif
- }
- }
+ inline void
+ insert_start(const T& key, int source, bool)
+ {
+ unsigned int pos = k + source;
+
+ new(&(losers[pos].key)) T(key);
+ losers[pos].source = source;
+ }
+};
+
+/**
+ * @brief Stable implementation of unguarded LoserTree.
+ *
+ * Unstable variant is selected below with partial specialization.
+ */
+template<bool stable/* default == true */, typename T, typename Comparator>
+class LoserTreeUnguarded : public LoserTreeUnguardedBase<T, Comparator>
+{
+ typedef LoserTreeUnguardedBase<T, Comparator> Base;
+ using Base::k;
+ using Base::losers;
+
+public:
+ LoserTreeUnguarded(unsigned int _k, const T _sentinel,
+ Comparator _comp = std::less<T>())
+ : Base::LoserTreeUnguardedBase(_k, _sentinel, _comp)
+ {}
+
+ unsigned int
+ init_winner(unsigned int root)
+ {
+ if (root >= k)
+ {
+ return root;
+ }
+ else
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+ if (!comp(losers[right].key, losers[left].key))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
+ }
+
+ inline void
+ init()
+ {
+ losers[0] = losers[init_winner(1)];
- losers[0].sup = sup;
- losers[0].source = source;
-#ifdef COPY
- KEY(0) = KEY_SOURCE(source);
+ // no dummy sequence can ever be at the top at the beginning (0 sequences!)
+#if _GLIBCXX_ASSERTIONS
+ _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
- }
- };
-#undef KEY
-#undef KEY_SOURCE
+ }
+ // Do not pass a const reference since key will be used as local variable.
+ inline void
+ delete_min_insert(T key, bool)
+ {
+ // No dummy sequence can ever be at the top and be retrieved!
+#if _GLIBCXX_ASSERTIONS
+ _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
-#if _GLIBCXX_LOSER_TREE_POINTER
-
-/** @brief Guarded loser tree, either copying the whole element into
- the tree structure, or looking up the element via the index.
-* Guarding is done explicitly through one flag sup per element,
-* inf is not needed due to a better initialization routine.
-* This is a well-performing variant.
-*/
-template<typename T, typename Comparator = std::less<T> >
- class LoserTreePointer
+ int source = losers[0].source;
+ printf("%d\n", source);
+ for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+ {
+ // The smaller one gets promoted, ties are broken by source.
+ if (comp(losers[pos].key, key)
+ || (!comp(key, losers[pos].key) && losers[pos].source < source))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].key, key);
+ }
+ }
+
+ losers[0].source = source;
+ losers[0].key = key;
+ }
+};
+
+/**
+ * @brief Non-Stable implementation of unguarded LoserTree.
+ *
+ * Stable implementation is above.
+ */
+template<typename T, typename Comparator>
+class LoserTreeUnguarded</* stable == */false, T, Comparator> :
+ public LoserTreeUnguardedBase<T, Comparator>
+{
+ typedef LoserTreeUnguardedBase<T, Comparator> Base;
+ using Base::k;
+ using Base::losers;
+
+public:
+ LoserTreeUnguarded(unsigned int _k, const T _sentinel,
+ Comparator _comp = std::less<T>())
+ : Base::LoserTreeUnguardedBase(_k, _sentinel, _comp)
+ {}
+
+ unsigned int
+ init_winner (unsigned int root)
{
- private:
- struct Loser
- {
- bool sup;
- int source;
- const T* keyp;
- };
-
- unsigned int ik, k, offset;
- Loser* losers;
- Comparator comp;
-
- public:
- LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
- : comp(_comp)
- {
- ik = _k;
-
- // Next greater power of 2.
- k = 1 << (log2(ik - 1) + 1);
- offset = k;
- losers = new Loser[k * 2];
- for (unsigned int i = ik - 1; i < k; ++i)
- losers[i + k].sup = true;
- }
-
- ~LoserTreePointer()
- { delete[] losers; }
-
- int
- get_min_source()
- { return losers[0].source; }
-
- void
- insert_start(const T& key, int source, bool sup)
- {
- unsigned int pos = k + source;
-
- losers[pos].sup = sup;
- losers[pos].source = source;
- losers[pos].keyp = &key;
- }
-
- unsigned int
- init_winner(unsigned int root)
- {
- if (root >= k)
- return root;
- else
- {
- unsigned int left = init_winner (2 * root);
- unsigned int right = init_winner (2 * root + 1);
- if (losers[right].sup
- || (!losers[left].sup
- && !comp(*losers[right].keyp, *losers[left].keyp)))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
- }
-
- void
- init()
- { losers[0] = losers[init_winner(1)]; }
-
- void
- delete_min_insert(const T& key, bool sup)
- {
- const T* keyp = &key;
- int source = losers[0].source;
- for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted.
- if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp)))
- {
- // The other one is smaller.
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].keyp, keyp);
- }
- }
-
- losers[0].sup = sup;
- losers[0].source = source;
- losers[0].keyp = keyp;
- }
-
- void
- insert_start_stable(const T& key, int source, bool sup)
- { return insert_start(key, source, sup); }
-
- unsigned int
- init_winner_stable(unsigned int root)
- {
- if (root >= k)
- {
- return root;
- }
- else
- {
- unsigned int left = init_winner (2 * root);
- unsigned int right = init_winner (2 * root + 1);
- if (losers[right].sup
- || (!losers[left].sup && !comp(*losers[right].keyp,
- *losers[left].keyp)))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
- }
-
- void
- init_stable()
- { losers[0] = losers[init_winner_stable(1)]; }
-
- void
- delete_min_insert_stable(const T& key, bool sup)
- {
- const T* keyp = &key;
- int source = losers[0].source;
- for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted, ties are broken by source.
- if ( (sup && (!losers[pos].sup || losers[pos].source < source))
- || (!sup && !losers[pos].sup &&
- ((comp(*losers[pos].keyp, *keyp))
- || (!comp(*keyp, *losers[pos].keyp)
- && losers[pos].source < source))))
- {
- // The other one is smaller.
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].keyp, keyp);
- }
- }
-
- losers[0].sup = sup;
- losers[0].source = source;
- losers[0].keyp = keyp;
- }
- };
-
+ if (root >= k)
+ {
+ return root;
+ }
+ else
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+
+#if _GLIBCXX_ASSERTIONS
+ // If left one is sentinel then right one must be, too.
+ if (losers[left].source == -1)
+ _GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1);
#endif
-#if _GLIBCXX_LOSER_TREE_UNGUARDED
-
-/** @brief Unguarded loser tree, copying the whole element into the
-* tree structure.
-*
-* No guarding is done, therefore not a single input sequence must
-* run empty. This is a very fast variant.
-*/
-template<typename T, typename Comparator = std::less<T> >
- class LoserTreeUnguarded
+ if (!comp(losers[right].key, losers[left].key))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
+ }
+
+ inline void
+ init()
{
- private:
- struct Loser
- {
- int source;
- T key;
- };
-
- unsigned int ik, k, offset;
- unsigned int* mapping;
- Loser* losers;
- Comparator comp;
-
- void
- map(unsigned int root, unsigned int begin, unsigned int end)
- {
- if (begin + 1 == end)
- mapping[begin] = root;
- else
- {
- // Next greater or equal power of 2.
- unsigned int left = 1 << (log2(end - begin - 1));
- map(root * 2, begin, begin + left);
- map(root * 2 + 1, begin + left, end);
- }
- }
-
- public:
- LoserTreeUnguarded(unsigned int _k, Comparator _comp = std::less<T>())
- : comp(_comp)
- {
- ik = _k;
- // Next greater or equal power of 2.
- k = 1 << (log2(ik - 1) + 1);
- offset = k;
- losers = new Loser[k + ik];
- mapping = new unsigned int[ik];
- map(1, 0, ik);
- }
-
- ~LoserTreeUnguarded()
- {
- delete[] losers;
- delete[] mapping;
- }
-
- int
- get_min_source()
- { return losers[0].source; }
-
- void
- insert_start(const T& key, int source, bool)
- {
- unsigned int pos = mapping[source];
- losers[pos].source = source;
- losers[pos].key = key;
- }
-
- unsigned int
- init_winner(unsigned int root, unsigned int begin, unsigned int end)
- {
- if (begin + 1 == end)
- return mapping[begin];
- else
- {
- // Next greater or equal power of 2.
- unsigned int division = 1 << (log2(end - begin - 1));
- unsigned int left = init_winner(2 * root, begin, begin + division);
- unsigned int right =
- init_winner(2 * root + 1, begin + division, end);
- if (!comp(losers[right].key, losers[left].key))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
- }
-
- void
- init()
- { losers[0] = losers[init_winner(1, 0, ik)]; }
-
- // Do not pass const reference since key will be used as local variable.
- void
- delete_min_insert(const T& key, bool)
- {
- losers[0].key = key;
- T& keyr = losers[0].key;
- int& source = losers[0].source;
- for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted.
- if (comp(losers[pos].key, keyr))
- {
- // The other one is smaller.
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].key, keyr);
- }
- }
- }
-
- void
- insert_start_stable(const T& key, int source, bool)
- { return insert_start(key, source, false); }
-
- void
- init_stable()
- { init(); }
-
- void
- delete_min_insert_stable(const T& key, bool)
- {
- losers[0].key = key;
- T& keyr = losers[0].key;
- int& source = losers[0].source;
- for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted, ties are broken by source.
- if (comp(losers[pos].key, keyr)
- || (!comp(keyr, losers[pos].key)
- && losers[pos].source < source))
- {
- // The other one is smaller.
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].key, keyr);
- }
- }
- }
- };
+ losers[0] = losers[init_winner(1)];
+ // no dummy sequence can ever be at the top at the beginning (0 sequences!)
+#if _GLIBCXX_ASSERTIONS
+ _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
+ }
-#if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
+ // Do not pass a const reference since key will be used as local variable.
+ inline void
+ delete_min_insert(T key, bool)
+ {
+ printf("wrong\n");
+ int source = losers[0].source;
+ for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+ {
+ // The smaller one gets promoted.
+ if (comp(losers[pos].key, key))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].key, key);
+ }
+ }
+
+ losers[0].source = source;
+ losers[0].key = key;
+ }
+};
/** @brief Unguarded loser tree, keeping only pointers to the
* elements in the tree structure.
@@ -926,175 +773,233 @@ template<typename T, typename Comparator = std::less<T> >
* No guarding is done, therefore not a single input sequence must
* run empty. This is a very fast variant.
*/
-template<typename T, typename Comparator = std::less<T> >
- class LoserTreePointerUnguarded
+template<typename T, typename Comparator>
+class LoserTreePointerUnguardedBase
+{
+protected:
+ struct Loser
{
- private:
- struct Loser
- {
- int source;
- const T* keyp;
- };
-
- unsigned int ik, k, offset;
- unsigned int* mapping;
- Loser* losers;
- Comparator comp;
-
- void map(unsigned int root, unsigned int begin, unsigned int end)
- {
- if (begin + 1 == end)
- mapping[begin] = root;
- else
- {
- // Next greater or equal power of 2.
- unsigned int left = 1 << (log2(end - begin - 1));
- map(root * 2, begin, begin + left);
- map(root * 2 + 1, begin + left, end);
- }
- }
-
- public:
- LoserTreePointerUnguarded(unsigned int _k,
- Comparator _comp = std::less<T>())
- : comp(_comp)
- {
- ik = _k;
-
- // Next greater power of 2.
- k = 1 << (log2(ik - 1) + 1);
- offset = k;
- losers = new Loser[k + ik];
- mapping = new unsigned int[ik];
- map(1, 0, ik);
- }
-
- ~LoserTreePointerUnguarded()
- {
- delete[] losers;
- delete[] mapping;
- }
-
- int
- get_min_source()
- { return losers[0].source; }
-
- void
- insert_start(const T& key, int source, bool)
- {
- unsigned int pos = mapping[source];
- losers[pos].source = source;
- losers[pos].keyp = &key;
- }
-
- unsigned int
- init_winner(unsigned int root, unsigned int begin, unsigned int end)
- {
- if (begin + 1 == end)
- return mapping[begin];
- else
- {
- // Next greater or equal power of 2.
- unsigned int division = 1 << (log2(end - begin - 1));
- unsigned int left = init_winner(2 * root, begin, begin + division);
- unsigned int right = init_winner(2 * root + 1,
- begin + division, end);
- if (!comp(*losers[right].keyp, *losers[left].keyp))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
- }
+ int source;
+ const T* keyp;
+ };
- void
- init()
- { losers[0] = losers[init_winner(1, 0, ik)]; }
+ unsigned int ik, k, offset;
+ Loser* losers;
+ const T sentinel;
+ Comparator comp;
- void
- delete_min_insert(const T& key, bool)
- {
- const T* keyp = &key;
- int& source = losers[0].source;
- for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted.
- if (comp(*losers[pos].keyp, *keyp))
- {
- // The other one is smaller.
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].keyp, keyp);
- }
- }
-
- losers[0].keyp = keyp;
- }
+public:
- void
- insert_start_stable(const T& key, int source, bool)
- { return insert_start(key, source, false); }
+ inline
+ LoserTreePointerUnguardedBase(unsigned int _k, const T _sentinel,
+ Comparator _comp = std::less<T>())
+ : sentinel(_sentinel), comp(_comp)
+ {
+ ik = _k;
+
+ // Next greater power of 2.
+ k = 1 << (log2(ik - 1) + 1);
+ offset = k;
+ // Avoid default-constructing losers[].key
+ losers = new Loser[2 * k];
+
+ for (unsigned int i = /*k + ik - 1*/0; i < (2 * k); ++i)
+ {
+ losers[i].keyp = &sentinel;
+ losers[i].source = -1;
+ }
+ }
+
+ inline ~LoserTreePointerUnguardedBase()
+ { delete[] losers; }
+
+ inline int
+ get_min_source()
+ {
+ // no dummy sequence can ever be at the top!
+#if _GLIBCXX_ASSERTIONS
+ _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
+#endif
+ return losers[0].source;
+ }
- void
- init_stable()
- { init(); }
+ inline void
+ insert_start(const T& key, int source, bool)
+ {
+ unsigned int pos = k + source;
+
+ losers[pos].keyp = &key;
+ losers[pos].source = source;
+ }
+};
+
+/**
+ * @brief Stable unguarded LoserTree variant storing pointers.
+ *
+ * Unstable variant is implemented below using partial specialization.
+ */
+template<bool stable/* default == true */, typename T, typename Comparator>
+class LoserTreePointerUnguarded :
+ public LoserTreePointerUnguardedBase<T, Comparator>
+{
+ typedef LoserTreePointerUnguardedBase<T, Comparator> Base;
+ using Base::k;
+ using Base::losers;
+
+public:
+ LoserTreePointerUnguarded(unsigned int _k, const T _sentinel,
+ Comparator _comp = std::less<T>())
+ : Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp)
+ {}
+
+ unsigned int
+ init_winner(unsigned int root)
+ {
+ if (root >= k)
+ {
+ return root;
+ }
+ else
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+ if (!comp(*losers[right].keyp, *losers[left].keyp))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
+ }
+
+ inline void
+ init()
+ {
+ losers[0] = losers[init_winner(1)];
- void
- delete_min_insert_stable(const T& key, bool)
- {
- int& source = losers[0].source;
- const T* keyp = &key;
- for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted, ties are broken by source.
- if (comp(*losers[pos].keyp, *keyp)
- || (!comp(*keyp, *losers[pos].keyp)
- && losers[pos].source < source))
- {
- // The other one is smaller.
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].keyp, keyp);
- }
- }
- losers[0].keyp = keyp;
- }
- };
+ // no dummy sequence can ever be at the top at the beginning (0 sequences!)
+#if _GLIBCXX_ASSERTIONS
+ _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
+ }
-template<typename _ValueTp, class Comparator>
- struct loser_tree_traits
+ inline void
+ delete_min_insert(const T& key, bool sup)
{
-#if _GLIBCXX_LOSER_TREE
- typedef LoserTree<_ValueTp, Comparator> LT;
-#else
-# if _GLIBCXX_LOSER_TREE_POINTER
- typedef LoserTreePointer<_ValueTp, Comparator> LT;
-# else
-# error Must define some type in losertree.h.
-# endif
+ const T* keyp = &key;
+ int source = losers[0].source;
+ for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+ {
+ // The smaller one gets promoted, ties are broken by source.
+ if (comp(*losers[pos].keyp, *keyp)
+ || (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].keyp, keyp);
+ }
+ }
+
+ losers[0].source = source;
+ losers[0].keyp = keyp;
+
+ // no dummy sequence can ever be at the top!
+#if _GLIBCXX_ASSERTIONS
+ _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
+#endif
+ }
+};
+
+/**
+ * @brief Unstable unguarded LoserTree variant storing pointers.
+ *
+ * Stable variant is above.
+ */
+template<typename T, typename Comparator>
+class LoserTreePointerUnguarded</* stable == */false, T, Comparator> :
+ public LoserTreePointerUnguardedBase<T, Comparator>
+{
+ typedef LoserTreePointerUnguardedBase<T, Comparator> Base;
+ using Base::k;
+ using Base::losers;
+
+public:
+ LoserTreePointerUnguarded(unsigned int _k, const T _sentinel,
+ Comparator _comp = std::less<T>())
+ : Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp)
+ {}
+
+ unsigned int
+ init_winner(unsigned int root)
+ {
+ if (root >= k)
+ {
+ return root;
+ }
+ else
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+
+#if _GLIBCXX_ASSERTIONS
+ // If left one is sentinel then right one must be, too.
+ if (losers[left].source == -1)
+ _GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1);
#endif
- };
-template<typename _ValueTp, class Comparator>
- struct loser_tree_unguarded_traits
+ if (!comp(*losers[right].keyp, *losers[left].keyp))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
+ }
+
+ inline void
+ init()
{
-#if _GLIBCXX_LOSER_TREE_UNGUARDED
- typedef LoserTreeUnguarded<_ValueTp, Comparator> LT;
-#else
-# if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
- typedef LoserTreePointerUnguarded<_ValueTp, Comparator> LT;
-# else
-# error Must define some unguarded type in losertree.h.
-# endif
+ losers[0] = losers[init_winner(1)];
+
+ // no dummy sequence can ever be at the top at the beginning (0 sequences!)
+#if _GLIBCXX_ASSERTIONS
+ _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
- };
+ }
-}
+ inline void
+ delete_min_insert(const T& key, bool sup)
+ {
+ const T* keyp = &key;
+ int source = losers[0].source;
+ for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
+ {
+ // The smaller one gets promoted.
+ if (comp(*(losers[pos].keyp), *keyp))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].keyp, keyp);
+ }
+ }
+
+ losers[0].source = source;
+ losers[0].keyp = keyp;
+ }
+};
+
+} // namespace __gnu_parallel
#endif
diff --git a/libstdc++-v3/include/parallel/merge.h b/libstdc++-v3/include/parallel/merge.h
index f12f311..6e0f2e3 100644
--- a/libstdc++-v3/include/parallel/merge.h
+++ b/libstdc++-v3/include/parallel/merge.h
@@ -239,19 +239,26 @@ namespace __gnu_parallel
std::iterator_traits<RandomAccessIterator1>::
difference_type max_length, Comparator comp)
{
- typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
- value_type;
+ typedef typename
+ std::iterator_traits<RandomAccessIterator1>::value_type value_type;
typedef typename std::iterator_traits<RandomAccessIterator1>::
difference_type difference_type1 /* == difference_type2 */;
typedef typename std::iterator_traits<RandomAccessIterator3>::
difference_type difference_type3;
+ typedef typename std::pair<RandomAccessIterator1, RandomAccessIterator1>
+ iterator_pair;
std::pair<RandomAccessIterator1, RandomAccessIterator1>
seqs[2] = { std::make_pair(begin1, end1),
std::make_pair(begin2, end2) };
- RandomAccessIterator3
- target_end = parallel_multiway_merge(seqs, seqs + 2, target,
- comp, max_length, true, false);
+ RandomAccessIterator3
+ target_end = parallel_multiway_merge
+ < /* stable = */ true, /* sentinels = */ false>(
+ seqs, seqs + 2, target, comp,
+ multiway_merge_exact_splitting
+ < /* stable = */ true, iterator_pair*,
+ Comparator, difference_type1>,
+ max_length);
return target_end;
}
diff --git a/libstdc++-v3/include/parallel/multiway_merge.h b/libstdc++-v3/include/parallel/multiway_merge.h
index 6cc724b..40a2f1b 100644
--- a/libstdc++-v3/include/parallel/multiway_merge.h
+++ b/libstdc++-v3/include/parallel/multiway_merge.h
@@ -40,7 +40,7 @@
* This file is a GNU parallel extension to the Standard C++ Library.
*/
-// Written by Johannes Singler.
+// Written by Johannes Singler and Manuel Holtgrewe.
#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
#define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
@@ -50,7 +50,6 @@
#include <bits/stl_algo.h>
#include <parallel/features.h>
#include <parallel/parallel.h>
-#include <parallel/merge.h>
#include <parallel/losertree.h>
#if _GLIBCXX_ASSERTIONS
#include <parallel/checkers.h>
@@ -59,27 +58,34 @@
/** @brief Length of a sequence described by a pair of iterators. */
#define _GLIBCXX_PARALLEL_LENGTH(s) ((s).second - (s).first)
-// XXX need iterator typedefs
namespace __gnu_parallel
{
+
+// Announce guarded and unguarded iterator.
+
template<typename RandomAccessIterator, typename Comparator>
class guarded_iterator;
+// Making the arguments const references seems to dangerous,
+// the user-defined comparator might not be const.
template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
- guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+ guarded_iterator<RandomAccessIterator, Comparator>& bi2);
template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
- guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+ guarded_iterator<RandomAccessIterator, Comparator>& bi2);
- /** @brief Iterator wrapper supporting an implicit supremum at the end
- of the sequence, dominating all comparisons.
- * Deriving from RandomAccessIterator is not possible since
- * RandomAccessIterator need not be a class.
- */
+/** @brief Iterator wrapper supporting an implicit supremum at the end
+ * of the sequence, dominating all comparisons.
+ *
+ * The implicit supremum comes with a performance cost.
+ *
+ * Deriving from RandomAccessIterator is not possible since
+ * RandomAccessIterator need not be a class.
+ */
template<typename RandomAccessIterator, typename Comparator>
class guarded_iterator
{
@@ -100,7 +106,7 @@ template<typename RandomAccessIterator, typename Comparator>
* @param comp Comparator provided for associated overloaded
* compare operators. */
guarded_iterator(RandomAccessIterator begin,
- RandomAccessIterator end, Comparator& comp)
+ RandomAccessIterator end, Comparator& comp)
: current(begin), end(end), comp(comp)
{ }
@@ -115,7 +121,7 @@ template<typename RandomAccessIterator, typename Comparator>
/** @brief Dereference operator.
* @return Referenced element. */
- typename std::iterator_traits<RandomAccessIterator>::value_type
+ typename std::iterator_traits<RandomAccessIterator>::value_type&
operator*()
{ return *current; }
@@ -158,7 +164,7 @@ template<typename RandomAccessIterator, typename Comparator>
template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
- guarded_iterator<RandomAccessIterator, Comparator>& bi2)
+ guarded_iterator<RandomAccessIterator, Comparator>& bi2)
{
if (bi2.current == bi2.end) //bi1 is sup
return bi1.current != bi1.end; //bi2 is not sup
@@ -185,7 +191,7 @@ template<typename RandomAccessIterator, typename Comparator>
{
private:
/** @brief Current iterator position. */
- RandomAccessIterator& current;
+ RandomAccessIterator current;
/** @brief Comparator. */
mutable Comparator& comp;
@@ -195,7 +201,7 @@ template<typename RandomAccessIterator, typename Comparator>
* @param end Unused, only for compatibility.
* @param comp Unused, only for compatibility. */
unguarded_iterator(RandomAccessIterator begin,
- RandomAccessIterator end, Comparator& comp)
+ RandomAccessIterator end, Comparator& comp)
: current(begin), comp(comp)
{ }
@@ -210,7 +216,7 @@ template<typename RandomAccessIterator, typename Comparator>
/** @brief Dereference operator.
* @return Referenced element. */
- typename std::iterator_traits<RandomAccessIterator>::value_type
+ typename std::iterator_traits<RandomAccessIterator>::value_type&
operator*()
{ return *current; }
@@ -256,159 +262,41 @@ template<typename RandomAccessIterator, typename Comparator>
return !(bi1.comp)(*bi2, *bi1);
}
-/** Prepare a set of sequences to be merged without a (end) guard
- * @param seqs_begin
- * @param seqs_end
- * @param comp
- * @param min_sequence
- * @param stable
- * @pre (seqs_end - seqs_begin > 0) */
-template<typename RandomAccessIteratorIterator, typename Comparator>
- typename std::iterator_traits<
- typename std::iterator_traits<RandomAccessIteratorIterator>::value_type
- ::first_type>::difference_type
- prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
- RandomAccessIteratorIterator seqs_end, Comparator comp,
- int& min_sequence, bool stable)
- {
- _GLIBCXX_CALL(seqs_end - seqs_begin)
-
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>
- ::value_type::first_type
- RandomAccessIterator1;
- typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
- value_type;
- typedef typename std::iterator_traits<RandomAccessIterator1>
- ::difference_type
- difference_type;
-
- if ((*seqs_begin).first == (*seqs_begin).second)
- {
- // Empty sequence found, it's the first one.
- min_sequence = 0;
- return -1;
- }
-
- // Last element in sequence.
- value_type min = *((*seqs_begin).second - 1);
- min_sequence = 0;
- for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; ++s)
- {
- if ((*s).first == (*s).second)
- {
- // Empty sequence found.
- min_sequence = static_cast<int>(s - seqs_begin);
- return -1;
- }
-
- // Last element in sequence.
- const value_type& v = *((*s).second - 1);
- if (comp(v, min)) //strictly smaller
- {
- min = v;
- min_sequence = static_cast<int>(s - seqs_begin);
- }
- }
-
- difference_type overhang_size = 0;
-
- int s = 0;
- for (s = 0; s <= min_sequence; ++s)
- {
- RandomAccessIterator1 split;
- if (stable)
- split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
- min, comp);
- else
- split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
- min, comp);
-
- overhang_size += seqs_begin[s].second - split;
- }
-
- for (; s < (seqs_end - seqs_begin); ++s)
- {
- RandomAccessIterator1 split = std::lower_bound(
- seqs_begin[s].first, seqs_begin[s].second, min, comp);
- overhang_size += seqs_begin[s].second - split;
- }
-
- // So many elements will be left over afterwards.
- return overhang_size;
- }
-
-/** Prepare a set of sequences to be merged with a (end) guard (sentinel)
- * @param seqs_begin
- * @param seqs_end
- * @param comp */
-template<typename RandomAccessIteratorIterator, typename Comparator>
- typename std::iterator_traits<typename std::iterator_traits<
- RandomAccessIteratorIterator>::value_type::first_type>::difference_type
- prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
- RandomAccessIteratorIterator seqs_end,
- Comparator comp)
- {
- _GLIBCXX_CALL(seqs_end - seqs_begin)
-
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>
- ::value_type::first_type
- RandomAccessIterator1;
- typedef typename std::iterator_traits<RandomAccessIterator1>
- ::value_type
- value_type;
- typedef typename std::iterator_traits<RandomAccessIterator1>
- ::difference_type
- difference_type;
-
- // Last element in sequence.
- value_type* max = NULL;
- for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
- {
- if ((*s).first == (*s).second)
- continue;
-
- // Last element in sequence.
- value_type& v = *((*s).second - 1);
-
- // Strictly greater.
- if (!max || comp(*max, v))
- max = &v;
- }
-
- difference_type overhang_size = 0;
- for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
- {
- RandomAccessIterator1 split =
- std::lower_bound((*s).first, (*s).second, *max, comp);
- overhang_size += (*s).second - split;
-
- // Set sentinel.
- *((*s).second) = *max;
- }
-
- // So many elements will be left over afterwards.
- return overhang_size;
- }
-
/** @brief Highly efficient 3-way merging procedure.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Unused, stable anyway.
- * @return End iterator of output sequence. */
+ *
+ * Merging is done with the algorithm implementation described by Peter
+ * Sanders. Basically, the idea is to minimize the number of necessary
+ * comparison after merging out an element. The implementation trick
+ * that makes this fast is that the order of the sequences is stored
+ * in the instruction pointer (translated into labels in C++).
+ *
+ * This works well for merging up to 4 sequences.
+ *
+ * Note that making the merging stable does <em>not</em> come at a
+ * performance hit.
+ *
+ * Whether the merging is done guarded or unguarded is selected by the
+ * used iterator class.
+ *
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ *
+ * @return End iterator of output sequence.
+ */
template<template<typename RAI, typename C> class iterator,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
- multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin,
- RandomAccessIteratorIterator seqs_end,
- RandomAccessIterator3 target,
- Comparator comp, _DifferenceTp length,
- bool stable)
+ multiway_merge_3_variant(
+ RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp, _DifferenceTp length)
{
_GLIBCXX_CALL(length);
@@ -423,6 +311,10 @@ template<template<typename RAI, typename C> class iterator,
if (length == 0)
return target;
+#if _GLIBCXX_ASSERTIONS
+ _DifferenceTp orig_length = length;
+#endif
+
iterator<RandomAccessIterator1, Comparator>
seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
@@ -450,17 +342,16 @@ template<template<typename RAI, typename C> class iterator,
else
goto s210;
}
-
-#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1)\
+#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1) \
s ## a ## b ## c : \
*target = *seq ## a; \
- ++target; \
- --length; \
- ++seq ## a; \
- if (length == 0) goto finish; \
- if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
- if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
- goto s ## b ## c ## a;
+ ++target; \
+ --length; \
+ ++seq ## a; \
+ if (length == 0) goto finish; \
+ if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
+ if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
+ goto s ## b ## c ## a;
_GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=);
_GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < );
@@ -474,6 +365,14 @@ template<template<typename RAI, typename C> class iterator,
finish:
;
+#if _GLIBCXX_ASSERTIONS
+ _GLIBCXX_PARALLEL_ASSERT(
+ ((RandomAccessIterator1)seq0 - seqs_begin[0].first) +
+ ((RandomAccessIterator1)seq1 - seqs_begin[1].first) +
+ ((RandomAccessIterator1)seq2 - seqs_begin[2].first)
+ == orig_length);
+#endif
+
seqs_begin[0].first = seq0;
seqs_begin[1].first = seq1;
seqs_begin[2].first = seq2;
@@ -481,95 +380,31 @@ template<template<typename RAI, typename C> class iterator,
return target;
}
-template<typename RandomAccessIteratorIterator,
- typename RandomAccessIterator3,
- typename _DifferenceTp,
- typename Comparator>
- RandomAccessIterator3
- multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin,
- RandomAccessIteratorIterator seqs_end,
- RandomAccessIterator3 target,
- Comparator comp,
- _DifferenceTp length, bool stable)
- {
- _GLIBCXX_CALL(length);
-
- typedef _DifferenceTp difference_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>
- ::value_type::first_type
- RandomAccessIterator1;
- typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
- value_type;
-
- int min_seq;
- RandomAccessIterator3 target_end;
-
- // Stable anyway.
- difference_type overhang =
- prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
-
- difference_type total_length = 0;
- for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
- total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
-
- if (overhang != -1)
- {
- difference_type unguarded_length =
- std::min(length, total_length - overhang);
- target_end = multiway_merge_3_variant<unguarded_iterator>
- (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
- overhang = length - unguarded_length;
- }
- else
- {
- // Empty sequence found.
- overhang = length;
- target_end = target;
- }
-
-#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
- _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
-
- switch (min_seq)
- {
- case 0:
- // Iterators will be advanced accordingly.
- target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
- seqs_begin[2].first, seqs_begin[2].second,
- target_end, overhang, comp);
- break;
- case 1:
- target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
- seqs_begin[2].first, seqs_begin[2].second,
- target_end, overhang, comp);
- break;
- case 2:
- target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
- seqs_begin[1].first, seqs_begin[1].second,
- target_end, overhang, comp);
- break;
- default:
- _GLIBCXX_PARALLEL_ASSERT(false);
- }
-
-#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
- _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
-
- return target_end;
- }
-
-/** @brief Highly efficient 4-way merging procedure.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Unused, stable anyway.
- * @return End iterator of output sequence. */
+/**
+ * @brief Highly efficient 4-way merging procedure.
+ *
+ * Merging is done with the algorithm implementation described by Peter
+ * Sanders. Basically, the idea is to minimize the number of necessary
+ * comparison after merging out an element. The implementation trick
+ * that makes this fast is that the order of the sequences is stored
+ * in the instruction pointer (translated into goto labels in C++).
+ *
+ * This works well for merging up to 4 sequences.
+ *
+ * Note that making the merging stable does <em>not</em> come at a
+ * performance hit.
+ *
+ * Whether the merging is done guarded or unguarded is selected by the
+ * used iterator class.
+ *
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ *
+ * @return End iterator of output sequence.
+ */
template<template<typename RAI, typename C> class iterator,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
@@ -579,7 +414,7 @@ template<template<typename RAI, typename C> class iterator,
multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
- Comparator comp, _DifferenceTp length, bool stable)
+ Comparator comp, _DifferenceTp length)
{
_GLIBCXX_CALL(length);
typedef _DifferenceTp difference_type;
@@ -676,280 +511,20 @@ template<template<typename RAI, typename C> class iterator,
return target;
}
-template<typename RandomAccessIteratorIterator,
- typename RandomAccessIterator3,
- typename _DifferenceTp,
- typename Comparator>
- RandomAccessIterator3
- multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin,
- RandomAccessIteratorIterator seqs_end,
- RandomAccessIterator3 target,
- Comparator comp,
- _DifferenceTp length, bool stable)
- {
- _GLIBCXX_CALL(length);
- typedef _DifferenceTp difference_type;
-
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>
- ::value_type::first_type
- RandomAccessIterator1;
- typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
- value_type;
-
- int min_seq;
- RandomAccessIterator3 target_end;
-
- // Stable anyway.
- difference_type overhang =
- prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
-
- difference_type total_length = 0;
- for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
- total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
-
- if (overhang != -1)
- {
- difference_type unguarded_length =
- std::min(length, total_length - overhang);
- target_end = multiway_merge_4_variant<unguarded_iterator>
- (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
- overhang = length - unguarded_length;
- }
- else
- {
- // Empty sequence found.
- overhang = length;
- target_end = target;
- }
-
-#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
- _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
-
- std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> >
- one_missing(seqs_begin, seqs_end);
- one_missing.erase(one_missing.begin() + min_seq); //remove
-
- target_end = multiway_merge_3_variant<guarded_iterator>(
- one_missing.begin(), one_missing.end(),
- target_end, comp, overhang, stable);
-
- // Insert back again.
- one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
- // Write back modified iterators.
- copy(one_missing.begin(), one_missing.end(), seqs_begin);
-
-#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
- _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
-
- return target_end;
- }
-
-/** @brief Basic multi-way merging procedure.
- *
- * The head elements are kept in a sorted array, new heads are
- * inserted linearly.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @return End iterator of output sequence.
- */
-template<typename RandomAccessIteratorIterator,
- typename RandomAccessIterator3,
- typename _DifferenceTp,
- typename Comparator>
- RandomAccessIterator3
- multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin,
- RandomAccessIteratorIterator seqs_end,
- RandomAccessIterator3 target,
- Comparator comp, _DifferenceTp length, bool stable)
- {
- _GLIBCXX_CALL(length)
-
- typedef _DifferenceTp difference_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>
- ::value_type::first_type
- RandomAccessIterator1;
- typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
- value_type;
-
- int k = static_cast<int>(seqs_end - seqs_begin);
- int nrs; // Number of remaining sequences.
-
- // Avoid default constructor.
- value_type* fe = static_cast<value_type*>(
- ::operator new(sizeof(value_type) * k)); // Front elements.
- int* source = new int[k];
- difference_type total_length = 0;
-
- // Write entries into queue.
- nrs = 0;
- for (int pi = 0; pi < k; ++pi)
- {
- if (seqs_begin[pi].first != seqs_begin[pi].second)
- {
- ::new(&(fe[nrs])) value_type(*(seqs_begin[pi].first));
- source[nrs] = pi;
- ++nrs;
- total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[pi]);
- }
- }
-
- if (stable)
- {
- // Bubble sort fe and source by fe.
- for (int k = 0; k < nrs - 1; ++k)
- for (int pi = nrs - 1; pi > k; --pi)
- if (comp(fe[pi], fe[pi - 1]) ||
- (!comp(fe[pi - 1], fe[pi]) && source[pi] < source[pi - 1]))
- {
- std::swap(fe[pi - 1], fe[pi]);
- std::swap(source[pi - 1], source[pi]);
- }
- }
- else
- {
- for (int k = 0; k < nrs - 1; ++k)
- for (int pi = nrs - 1; pi > k; --pi)
- if (comp(fe[pi], fe[pi-1]))
- {
- std::swap(fe[pi-1], fe[pi]);
- std::swap(source[pi-1], source[pi]);
- }
- }
-
- // Iterate.
- if (stable)
- {
- int j;
- while (nrs > 0 && length > 0)
- {
- if (source[0] < source[1])
- {
- // fe[0] <= fe[1]
- while ((nrs == 1 || !comp(fe[1], fe[0])) && length > 0)
- {
- *target = fe[0];
- ++target;
- ++(seqs_begin[source[0]].first);
- --length;
- if (seqs_begin[source[0]].first
- == seqs_begin[source[0]].second)
- {
- // Move everything to the left.
- for (int s = 0; s < nrs - 1; ++s)
- {
- fe[s] = fe[s + 1];
- source[s] = source[s + 1];
- }
- fe[nrs - 1].~value_type(); //Destruct explicitly.
- --nrs;
- break;
- }
- else
- fe[0] = *(seqs_begin[source[0]].first);
- }
- }
- else
- {
- // fe[0] < fe[1]
- while ((nrs == 1 || comp(fe[0], fe[1])) && length > 0)
- {
- *target = fe[0];
- ++target;
- ++(seqs_begin[source[0]].first);
- --length;
- if (seqs_begin[source[0]].first
- == seqs_begin[source[0]].second)
- {
- for (int s = 0; s < nrs - 1; ++s)
- {
- fe[s] = fe[s + 1];
- source[s] = source[s + 1];
- }
- fe[nrs - 1].~value_type(); //Destruct explicitly.
- --nrs;
- break;
- }
- else
- fe[0] = *(seqs_begin[source[0]].first);
- }
- }
-
- // Sink down.
- j = 1;
- while ((j < nrs) && (comp(fe[j], fe[j - 1])
- || (!comp(fe[j - 1], fe[j])
- && (source[j] < source[j - 1]))))
- {
- std::swap(fe[j - 1], fe[j]);
- std::swap(source[j - 1], source[j]);
- ++j;
- }
- }
- }
- else
- {
- int j;
- while (nrs > 0 && length > 0)
- {
- // fe[0] <= fe[1]
- while (nrs == 1 || (!comp(fe[1], fe[0])) && length > 0)
- {
- *target = fe[0];
- ++target;
- ++seqs_begin[source[0]].first;
- --length;
- if (seqs_begin[source[0]].first
- == seqs_begin[source[0]].second)
- {
- for (int s = 0; s < (nrs - 1); ++s)
- {
- fe[s] = fe[s + 1];
- source[s] = source[s + 1];
- }
- fe[nrs - 1].~value_type(); //Destruct explicitly.
- --nrs;
- break;
- }
- else
- fe[0] = *(seqs_begin[source[0]].first);
- }
-
- // Sink down.
- j = 1;
- while ((j < nrs) && comp(fe[j], fe[j - 1]))
- {
- std::swap(fe[j - 1], fe[j]);
- std::swap(source[j - 1], source[j]);
- ++j;
- }
- }
- }
-
- ::operator delete(fe); //Destructors already called.
- delete[] source;
-
- return target;
- }
-
/** @brief Multi-way merging procedure for a high branching factor,
- * guarded case.
+ * guarded case.
*
- * The head elements are kept in a loser tree.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @return End iterator of output sequence.
+ * This merging variant uses a LoserTree class as selected by <tt>LT</tt>.
+ *
+ * Stability is selected through the used LoserTree class <tt>LT</tt>.
+ *
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ *
+ * @return End iterator of output sequence.
*/
template<typename LT,
typename RandomAccessIteratorIterator,
@@ -961,7 +536,7 @@ template<typename LT,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
- _DifferenceTp length, bool stable)
+ _DifferenceTp length)
{
_GLIBCXX_CALL(length)
@@ -994,82 +569,52 @@ template<typename LT,
for (int t = 0; t < k; ++t)
{
- if (stable)
- {
- if (seqs_begin[t].first == seqs_begin[t].second)
- lt.insert_start_stable(*arbitrary_element, t, true);
- else
- lt.insert_start_stable(*seqs_begin[t].first, t, false);
- }
+ if (seqs_begin[t].first == seqs_begin[t].second)
+ lt.insert_start(*arbitrary_element, t, true);
else
- {
- if (seqs_begin[t].first == seqs_begin[t].second)
- lt.insert_start(*arbitrary_element, t, true);
- else
- lt.insert_start(*seqs_begin[t].first, t, false);
- }
+ lt.insert_start(*seqs_begin[t].first, t, false);
}
- if (stable)
- lt.init_stable();
- else
- lt.init();
+ lt.init();
- total_length = std::min(total_length, length);
+ const difference_type const_total_length(std::min(total_length, length));
int source;
- if (stable)
+ for (difference_type i = 0; i < const_total_length; ++i)
{
- for (difference_type i = 0; i < total_length; ++i)
- {
- // Take out.
- source = lt.get_min_source();
+ //take out
+ source = lt.get_min_source();
- *(target++) = *(seqs_begin[source].first++);
+ *(target++) = *(seqs_begin[source].first++);
- // Feed.
- if (seqs_begin[source].first == seqs_begin[source].second)
- lt.delete_min_insert_stable(*arbitrary_element, true);
- else
- // Replace from same source.
- lt.delete_min_insert_stable(*seqs_begin[source].first, false);
-
- }
- }
- else
- {
- for (difference_type i = 0; i < total_length; ++i)
- {
- //take out
- source = lt.get_min_source();
-
- *(target++) = *(seqs_begin[source].first++);
-
- // Feed.
- if (seqs_begin[source].first == seqs_begin[source].second)
- lt.delete_min_insert(*arbitrary_element, true);
- else
- // Replace from same source.
- lt.delete_min_insert(*seqs_begin[source].first, false);
- }
+ // Feed.
+ if (seqs_begin[source].first == seqs_begin[source].second)
+ lt.delete_min_insert(*arbitrary_element, true);
+ else
+ // Replace from same source.
+ lt.delete_min_insert(*seqs_begin[source].first, false);
}
return target;
}
/** @brief Multi-way merging procedure for a high branching factor,
- * unguarded case.
+ * unguarded case.
*
- * The head elements are kept in a loser tree.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @return End iterator of output sequence.
- * @pre No input will run out of elements during the merge.
+ * Merging is done using the LoserTree class <tt>LT</tt>.
+ *
+ * Stability is selected by the used LoserTrees.
+ *
+ * @pre No input will run out of elements during the merge.
+ *
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ *
+ * @return End iterator of output sequence.
*/
template<typename LT,
typename RandomAccessIteratorIterator,
@@ -1079,8 +624,8 @@ template<typename LT,
multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
- Comparator comp,
- _DifferenceTp length, bool stable)
+ int min_seq, Comparator comp,
+ _DifferenceTp length)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
@@ -1093,7 +638,11 @@ template<typename LT,
int k = seqs_end - seqs_begin;
- LT lt(k, comp);
+ // Determine the sentinel. The sentinel is largest/last element of the
+ // sequences with the smallest largest/last element.
+ value_type sentinel = *(seqs_begin[min_seq].second - 1);
+
+ LT lt(k, sentinel, comp);
difference_type total_length = 0;
@@ -1102,18 +651,12 @@ template<typename LT,
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
#endif
- if (stable)
- lt.insert_start_stable(*seqs_begin[t].first, t, false);
- else
- lt.insert_start(*seqs_begin[t].first, t, false);
+ lt.insert_start(*seqs_begin[t].first, t, false);
total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
}
- if (stable)
- lt.init_stable();
- else
- lt.init();
+ lt.init();
// Do not go past end.
length = std::min(total_length, length);
@@ -1124,203 +667,311 @@ template<typename LT,
difference_type i = 0;
#endif
- if (stable)
+ RandomAccessIterator3 target_end = target + length;
+ while (target < target_end)
{
- RandomAccessIterator3 target_end = target + length;
- while (target < target_end)
- {
- // Take out.
- source = lt.get_min_source();
+ // Take out.
+ source = lt.get_min_source();
#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(i == 0
- || !comp(*(seqs_begin[source].first), *(target - 1)));
+ _GLIBCXX_PARALLEL_ASSERT(0 <= source && source < k);
+ _GLIBCXX_PARALLEL_ASSERT(i == 0
+ || !comp(*(seqs_begin[source].first), *(target - 1)));
#endif
- *(target++) = *(seqs_begin[source].first++);
+ // Feed.
+ *(target++) = *(seqs_begin[source].first++);
#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(
- (seqs_begin[source].first != seqs_begin[source].second)
- || (i == length - 1));
- ++i;
+ _GLIBCXX_PARALLEL_ASSERT(
+ (seqs_begin[source].first != seqs_begin[source].second)
+ || (i >= length - 1));
+ ++i;
#endif
- // Feed.
- // Replace from same source.
- lt.delete_min_insert_stable(*seqs_begin[source].first, false);
-
- }
- }
- else
- {
- RandomAccessIterator3 target_end = target + length;
- while (target < target_end)
- {
- // Take out.
- source = lt.get_min_source();
-
-#if _GLIBCXX_ASSERTIONS
- if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
- printf(" %i %i %i\n", length, i, source);
- _GLIBCXX_PARALLEL_ASSERT(i == 0
- || !comp(*(seqs_begin[source].first), *(target - 1)));
-#endif
-
- *(target++) = *(seqs_begin[source].first++);
-
-#if _GLIBCXX_ASSERTIONS
- if (!((seqs_begin[source].first != seqs_begin[source].second)
- || (i >= length - 1)))
- printf(" %i %i %i\n", length, i, source);
- _GLIBCXX_PARALLEL_ASSERT(
- (seqs_begin[source].first != seqs_begin[source].second)
- || (i >= length - 1));
- ++i;
-#endif
- // Feed.
- // Replace from same source.
- lt.delete_min_insert(*seqs_begin[source].first, false);
- }
+ // Replace from same source.
+ lt.delete_min_insert(*seqs_begin[source].first, false);
}
return target;
}
-template<typename RandomAccessIteratorIterator,
- typename RandomAccessIterator3,
- typename _DifferenceTp,
- typename Comparator>
+
+/** @brief Multi-way merging procedure for a high branching factor,
+ * requiring sentinels to exist.
+ * @param stable The value must the same as for the used LoserTrees.
+ * @param UnguardedLoserTree Loser Tree variant to use for the unguarded
+ * merging.
+ * @param GuardedLoserTree Loser Tree variant to use for the guarded
+ * merging.
+ *
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ *
+ * @return End iterator of output sequence.
+ */
+template<
+ typename UnguardedLoserTree,
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
- multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin,
+ multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
- _DifferenceTp length, bool stable)
+ _DifferenceTp length)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
-
+ typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
- int min_seq;
RandomAccessIterator3 target_end;
- difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
- comp, min_seq, stable);
difference_type total_length = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
- total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
-
- if (overhang != -1)
{
- difference_type unguarded_length =
- std::min(length, total_length - overhang);
- target_end = multiway_merge_loser_tree_unguarded
- <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
- (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
- overhang = length - unguarded_length;
- }
- else
- {
- // Empty sequence found.
- overhang = length;
- target_end = target;
- }
+ total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
-#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
- _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
+ // Move the sequends end behind the sentinel spots. This has the
+ // effect that the sentinel appears to be within the sequence. Then,
+ // we can use the unguarded variant if we merge out as many
+ // non-sentinel elements as we have.
+ ++((*s).second);
+ }
- target_end = multiway_merge_loser_tree
- <typename loser_tree_traits<value_type, Comparator>::LT>
- (seqs_begin, seqs_end, target_end, comp, overhang, stable);
+ difference_type unguarded_length =
+ std::min(length, total_length);
+ target_end = multiway_merge_loser_tree_unguarded
+ <UnguardedLoserTree>
+ (seqs_begin, seqs_end, target, 0, comp, unguarded_length);
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
+ // Restore the sequence ends so the sentinels are not contained in the
+ // sequence any more (see comment in loop above).
+ for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
+ { --((*s).second); }
+
return target_end;
}
-template<typename RandomAccessIteratorIterator,
- typename RandomAccessIterator3,
- typename _DifferenceTp,
- typename Comparator>
- RandomAccessIterator3
- multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
- RandomAccessIteratorIterator seqs_end,
- RandomAccessIterator3 target,
- Comparator comp,
- _DifferenceTp length, bool stable)
+/**
+ * @brief Traits for determining whether the loser tree should
+ * use pointers or copies.
+ *
+ * The field "use_pointer" is used to determine whether to use pointers in
+ * the loser trees or whether to copy the values into the loser tree.
+ *
+ * The default behavior is to use pointers if the data type is 4 times as
+ * big as the pointer to it.
+ *
+ * Specialize for your data type to customize the behavior.
+ *
+ * Example:
+ *
+ * template<>
+ * struct loser_tree_traits<int>
+ * { static const bool use_pointer = false; };
+ *
+ * template<>
+ * struct loser_tree_traits<heavyweight_type>
+ * { static const bool use_pointer = true; };
+ *
+ * @param T type to give the loser tree traits for.
+ */
+template <typename T>
+struct loser_tree_traits
+{
+ /**
+ * @brief True iff to use pointers instead of values in loser trees.
+ *
+ * The default behavior is to use pointers if the data type is four
+ * times as big as the pointer to it.
+ */
+ static const bool use_pointer = (sizeof(T) > 4 * sizeof(T*));
+};
+
+/**
+ * @brief Switch for 3-way merging with sentinels turned off.
+ *
+ * Note that 3-way merging is always stable!
+ */
+template<
+ bool sentinels /*default == false*/,
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
+struct multiway_merge_3_variant_sentinel_switch
+{
+ RandomAccessIterator3 operator()(
+ RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp, _DifferenceTp length)
{
- _GLIBCXX_CALL(length)
+ return multiway_merge_3_variant<guarded_iterator>(
+ seqs_begin, seqs_end, target, comp, length);
+ }
+};
- typedef _DifferenceTp difference_type;
- typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
+/**
+ * @brief Switch for 3-way merging with sentinels turned on.
+ *
+ * Note that 3-way merging is always stable!
+ */
+template<
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
+struct multiway_merge_3_variant_sentinel_switch
+ <true, RandomAccessIteratorIterator, RandomAccessIterator3,
+ _DifferenceTp, Comparator>
+{
+ RandomAccessIterator3 operator()(
+ RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp, _DifferenceTp length)
+ {
+ return multiway_merge_3_variant<unguarded_iterator>(
+ seqs_begin, seqs_end, target, comp, length);
+ }
+};
+
+/**
+ * @brief Switch for 4-way merging with sentinels turned off.
+ *
+ * Note that 4-way merging is always stable!
+ */
+template<
+ bool sentinels /*default == false*/,
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
+struct multiway_merge_4_variant_sentinel_switch
+{
+ RandomAccessIterator3 operator()(
+ RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp, _DifferenceTp length)
+ {
+ return multiway_merge_4_variant<guarded_iterator>(
+ seqs_begin, seqs_end, target, comp, length);
+ }
+};
+
+/**
+ * @brief Switch for 4-way merging with sentinels turned on.
+ *
+ * Note that 4-way merging is always stable!
+ */
+template<
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
+struct multiway_merge_4_variant_sentinel_switch
+ <true, RandomAccessIteratorIterator, RandomAccessIterator3,
+ _DifferenceTp, Comparator>
+{
+ RandomAccessIterator3 operator()(
+ RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp, _DifferenceTp length)
+ {
+ return multiway_merge_4_variant<unguarded_iterator>(
+ seqs_begin, seqs_end, target, comp, length);
+ }
+};
+
+/**
+ * @brief Switch for k-way merging with sentinels turned on.
+ */
+template<
+ bool sentinels,
+ bool stable,
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
+struct multiway_merge_k_variant_sentinel_switch
+{
+ RandomAccessIterator3 operator()(
+ RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp, _DifferenceTp length)
+ {
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
- RandomAccessIterator3 target_end;
- difference_type overhang =
- prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
-
- difference_type total_length = 0;
- for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
- {
- total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
-
- // Sentinel spot.
- ++((*s).second);
- }
-
- difference_type unguarded_length =
- std::min(length, total_length - overhang);
- target_end = multiway_merge_loser_tree_unguarded
- <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
- (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
- overhang = length - unguarded_length;
-
-#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
- _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
-
- // Copy rest stable.
- for (RandomAccessIteratorIterator s = seqs_begin;
- s != seqs_end && overhang > 0; ++s)
- {
- // Restore.
- --((*s).second);
- difference_type local_length =
- std::min<difference_type>(overhang, _GLIBCXX_PARALLEL_LENGTH(*s));
- target_end = std::copy((*s).first, (*s).first + local_length,
- target_end);
- (*s).first += local_length;
- overhang -= local_length;
- }
+ return multiway_merge_loser_tree_sentinel<
+ typename __gnu_cxx::__conditional_type<
+ loser_tree_traits<value_type>::use_pointer
+ , LoserTreePointerUnguarded<stable, value_type, Comparator>
+ , LoserTreeUnguarded<stable, value_type, Comparator>
+ >::__type>(seqs_begin, seqs_end, target, comp, length);
+ }
+};
-#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(overhang == 0);
- _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
- _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
+/**
+ * @brief Switch for k-way merging with sentinels turned off.
+ */
+template<
+ bool stable,
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
+struct multiway_merge_k_variant_sentinel_switch
+ <false, stable, RandomAccessIteratorIterator, RandomAccessIterator3,
+ _DifferenceTp, Comparator>
+{
+ RandomAccessIterator3 operator()(
+ RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp, _DifferenceTp length)
+ {
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
+ RandomAccessIterator1;
+ typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+ value_type;
- return target_end;
+ return multiway_merge_loser_tree<
+ typename __gnu_cxx::__conditional_type<
+ loser_tree_traits<value_type>::use_pointer
+ , LoserTreePointer<stable, value_type, Comparator>
+ , LoserTree<stable, value_type, Comparator>
+ >::__type >(seqs_begin, seqs_end, target, comp, length);
}
+};
/** @brief Sequential multi-way merging switch.
*
- * The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and
+ * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor and
* runtime settings.
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
@@ -1330,17 +981,18 @@ template<typename RandomAccessIteratorIterator,
* @param stable Stable merging incurs a performance penalty.
* @param sentinel The sequences have a sentinel element.
* @return End iterator of output sequence. */
-template<typename RandomAccessIteratorIterator,
- typename RandomAccessIterator3,
- typename _DifferenceTp,
- typename Comparator>
+template<
+ bool stable,
+ bool sentinels,
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
- multiway_merge(RandomAccessIteratorIterator seqs_begin,
+ sequential_multiway_merge(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
- Comparator comp, _DifferenceTp length,
- bool stable, bool sentinel,
- sequential_tag)
+ Comparator comp, _DifferenceTp length)
{
_GLIBCXX_CALL(length)
@@ -1353,17 +1005,14 @@ template<typename RandomAccessIteratorIterator,
#if _GLIBCXX_ASSERTIONS
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
- _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
+ {
+ _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
+ }
#endif
- RandomAccessIterator3 return_target = target;
+ RandomAccessIterator3 return_target = target;
int k = static_cast<int>(seqs_end - seqs_begin);
- _MultiwayMergeAlgorithm mwma = _Settings::get().multiway_merge_algorithm;
-
- if (!sentinel && mwma == LOSER_TREE_SENTINEL)
- mwma = LOSER_TREE_COMBINED;
-
switch (k)
{
case 0:
@@ -1382,113 +1031,30 @@ template<typename RandomAccessIteratorIterator,
target, length, comp);
break;
case 3:
- switch (mwma)
- {
- case LOSER_TREE_COMBINED:
- return_target = multiway_merge_3_combined(seqs_begin,
- seqs_end,
- target,
- comp, length,
- stable);
- break;
- case LOSER_TREE_SENTINEL:
- return_target =
- multiway_merge_3_variant<unguarded_iterator>(seqs_begin,
- seqs_end,
- target,
- comp, length,
- stable);
- break;
- default:
- return_target =
- multiway_merge_3_variant<guarded_iterator>(seqs_begin,
- seqs_end,
- target,
- comp, length,
- stable);
- break;
- }
+ return_target = multiway_merge_3_variant_sentinel_switch<
+ sentinels
+ , RandomAccessIteratorIterator
+ , RandomAccessIterator3
+ , _DifferenceTp
+ , Comparator>()(seqs_begin, seqs_end, target, comp, length);
break;
case 4:
- switch (mwma)
- {
- case LOSER_TREE_COMBINED:
- return_target = multiway_merge_4_combined(seqs_begin,
- seqs_end,
- target,
- comp, length, stable);
- break;
- case LOSER_TREE_SENTINEL:
- return_target =
- multiway_merge_4_variant<unguarded_iterator>(seqs_begin,
- seqs_end,
- target,
- comp, length,
- stable);
- break;
- default:
- return_target = multiway_merge_4_variant<guarded_iterator>(
- seqs_begin,
- seqs_end,
- target,
- comp, length, stable);
- break;
- }
+ return_target = multiway_merge_4_variant_sentinel_switch<
+ sentinels
+ , RandomAccessIteratorIterator
+ , RandomAccessIterator3
+ , _DifferenceTp
+ , Comparator>()(seqs_begin, seqs_end, target, comp, length);
break;
default:
- {
- switch (mwma)
- {
- case BUBBLE:
- return_target = multiway_merge_bubble(seqs_begin,
- seqs_end,
- target,
- comp, length, stable);
- break;
-#if _GLIBCXX_LOSER_TREE_EXPLICIT
- case LOSER_TREE_EXPLICIT:
- return_target = multiway_merge_loser_tree<
- LoserTreeExplicit<value_type, Comparator> >(seqs_begin,
- seqs_end,
- target,
- comp, length,
- stable);
- break;
-#endif
-#if _GLIBCXX_LOSER_TREE
- case LOSER_TREE:
- return_target = multiway_merge_loser_tree<
- LoserTree<value_type, Comparator> >(seqs_begin,
- seqs_end,
- target,
- comp, length,
- stable);
- break;
-#endif
-#if _GLIBCXX_LOSER_TREE_COMBINED
- case LOSER_TREE_COMBINED:
- return_target = multiway_merge_loser_tree_combined(seqs_begin,
- seqs_end,
- target,
- comp, length,
- stable);
- break;
-#endif
-#if _GLIBCXX_LOSER_TREE_SENTINEL
- case LOSER_TREE_SENTINEL:
- return_target = multiway_merge_loser_tree_sentinel(seqs_begin,
- seqs_end,
- target,
- comp, length,
- stable);
- break;
-#endif
- default:
- // multiway_merge algorithm not implemented.
- _GLIBCXX_PARALLEL_ASSERT(0);
- break;
- }
- }
+ return_target = multiway_merge_k_variant_sentinel_switch<
+ sentinels
+ , stable
+ , RandomAccessIteratorIterator
+ , RandomAccessIterator3
+ , _DifferenceTp
+ , Comparator>()(seqs_begin, seqs_end, target, comp, length);
+ break;
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
@@ -1497,38 +1063,246 @@ template<typename RandomAccessIteratorIterator,
return return_target;
}
+/**
+ * @brief Stable sorting functor.
+ *
+ * Used to reduce code instanciation in multiway_merge_sampling_splitting.
+ */
+template<bool stable, class RandomAccessIterator, class StrictWeakOrdering>
+struct sampling_sorter
+{
+ void operator()(RandomAccessIterator first, RandomAccessIterator last,
+ StrictWeakOrdering comp)
+ { __gnu_sequential::stable_sort(first, last, comp); }
+};
+
+/**
+ * @brief Non-stable sorting functor.
+ *
+ * Used to reduce code instanciation in multiway_merge_sampling_splitting.
+ */
+template<class RandomAccessIterator, class StrictWeakOrdering>
+struct sampling_sorter<false, RandomAccessIterator, StrictWeakOrdering>
+{
+ void operator()(RandomAccessIterator first, RandomAccessIterator last,
+ StrictWeakOrdering comp)
+ { __gnu_sequential::sort(first, last, comp); }
+};
+
+/**
+ * @brief Sampling based splitting for parallel multiway-merge routine.
+ */
+template<
+ bool stable
+ , typename RandomAccessIteratorIterator
+ , typename Comparator
+ , typename difference_type>
+void multiway_merge_sampling_splitting(
+ RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ Comparator comp, difference_type length,
+ difference_type total_length,
+ std::vector<std::pair<difference_type, difference_type> > *pieces)
+{
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
+ RandomAccessIterator1;
+ typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+ value_type;
+
+ // k sequences.
+ int k = static_cast<int>(seqs_end - seqs_begin);
+
+ int num_threads = omp_get_num_threads();
+
+ difference_type num_samples =
+ __gnu_parallel::_Settings::get().merge_oversampling * num_threads;
+
+ value_type* samples = static_cast<value_type*>(
+ ::operator new(sizeof(value_type) * k * num_samples));
+ // Sample.
+ for (int s = 0; s < k; ++s)
+ for (difference_type i = 0; i < num_samples; ++i)
+ {
+ difference_type sample_index =
+ static_cast<difference_type>(
+ _GLIBCXX_PARALLEL_LENGTH(seqs_begin[s]) * (double(i + 1) /
+ (num_samples + 1)) * (double(length)
+ / total_length));
+ new(&(samples[s * num_samples + i])) value_type(
+ seqs_begin[s].first[sample_index]);
+ }
+
+ // Sort stable or non-stable, depending on value of template parameter
+ // "stable".
+ sampling_sorter<stable, value_type*, Comparator>()(
+ samples, samples + (num_samples * k), comp);
+
+ for (int slab = 0; slab < num_threads; ++slab)
+ // For each slab / processor.
+ for (int seq = 0; seq < k; ++seq)
+ {
+ // For each sequence.
+ if (slab > 0)
+ pieces[slab][seq].first =
+ std::upper_bound(
+ seqs_begin[seq].first,
+ seqs_begin[seq].second,
+ samples[num_samples * k * slab / num_threads],
+ comp)
+ - seqs_begin[seq].first;
+ else
+ {
+ // Absolute beginning.
+ pieces[slab][seq].first = 0;
+ }
+ if ((slab + 1) < num_threads)
+ pieces[slab][seq].second =
+ std::upper_bound(
+ seqs_begin[seq].first,
+ seqs_begin[seq].second,
+ samples[num_samples * k * (slab + 1) /
+ num_threads], comp)
+ - seqs_begin[seq].first;
+ else
+ pieces[slab][seq].second = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
+ }
+ ::operator delete(samples);
+}
+
+/**
+ * @brief Exact splitting for parallel multiway-merge routine.
+ */
+template<
+ bool stable
+ , typename RandomAccessIteratorIterator
+ , typename Comparator
+ , typename difference_type>
+void multiway_merge_exact_splitting(
+ RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ Comparator comp,
+ difference_type length,
+ difference_type total_length,
+ std::vector<std::pair<difference_type, difference_type> > *pieces)
+{
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
+ RandomAccessIterator1;
+
+ const bool tight = (total_length == length);
+
+ // k sequences.
+ const int k = static_cast<int>(seqs_end - seqs_begin);
+
+ const int num_threads = omp_get_num_threads();
+
+ // (Settings::multiway_merge_splitting == __gnu_parallel::_Settings::EXACT).
+ std::vector<RandomAccessIterator1>* offsets =
+ new std::vector<RandomAccessIterator1>[num_threads];
+ std::vector<
+ std::pair<RandomAccessIterator1, RandomAccessIterator1>
+ > se(k);
+
+ copy(seqs_begin, seqs_end, se.begin());
+
+ difference_type* borders =
+ new difference_type[num_threads + 1];
+ equally_split(length, num_threads, borders);
+
+ for (int s = 0; s < (num_threads - 1); ++s)
+ {
+ offsets[s].resize(k);
+ multiseq_partition(
+ se.begin(), se.end(), borders[s + 1],
+ offsets[s].begin(), comp);
+
+ // Last one also needed and available.
+ if (!tight)
+ {
+ offsets[num_threads - 1].resize(k);
+ multiseq_partition(se.begin(), se.end(),
+ difference_type(length),
+ offsets[num_threads - 1].begin(), comp);
+ }
+ }
+
+
+ for (int slab = 0; slab < num_threads; ++slab)
+ {
+ // For each slab / processor.
+ for (int seq = 0; seq < k; ++seq)
+ {
+ // For each sequence.
+ if (slab == 0)
+ {
+ // Absolute beginning.
+ pieces[slab][seq].first = 0;
+ }
+ else
+ pieces[slab][seq].first =
+ pieces[slab - 1][seq].second;
+ if (!tight || slab < (num_threads - 1))
+ pieces[slab][seq].second =
+ offsets[slab][seq] - seqs_begin[seq].first;
+ else
+ {
+ // slab == num_threads - 1
+ pieces[slab][seq].second =
+ _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
+ }
+ }
+ }
+ delete[] offsets;
+}
+
/** @brief Parallel multi-way merge routine.
*
- * The _GLIBCXX_PARALLEL_DECISION if based on the branching factor
- * and runtime settings.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @param sentinel Ignored.
- * @return End iterator of output sequence.
+ * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor
+ * and runtime settings.
+ *
+ * Must not be called if the number of sequences is 1.
+ *
+ * @param Splitter functor to split input (either exact or sampling based)
+ *
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ * @param stable Stable merging incurs a performance penalty.
+ * @param sentinel Ignored.
+ * @return End iterator of output sequence.
*/
-template<typename RandomAccessIteratorIterator,
- typename RandomAccessIterator3,
- typename _DifferenceTp,
- typename Comparator>
+template<
+ bool stable,
+ bool sentinels,
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Splitter,
+ typename Comparator
+ >
RandomAccessIterator3
parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
- RandomAccessIterator3 target,
- Comparator comp,
- _DifferenceTp length, bool stable, bool sentinel)
+ RandomAccessIterator3 target,
+ Comparator comp,
+ Splitter splitter,
+ _DifferenceTp length)
{
+#if _GLIBCXX_ASSERTIONS
+ _GLIBCXX_PARALLEL_ASSERT(seqs_end - seqs_begin > 1);
+#endif
+
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
- typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
- value_type;
+ typedef typename
+ std::iterator_traits<RandomAccessIterator1>::value_type value_type;
// k sequences.
int k = static_cast<int>(seqs_end - seqs_begin);
@@ -1543,13 +1317,10 @@ template<typename RandomAccessIteratorIterator,
if (total_length == 0 || k == 0)
return target;
- bool tight = (total_length == length);
-
std::vector<std::pair<difference_type, difference_type> >* pieces;
thread_index_t num_threads = static_cast<thread_index_t>(
- std::min<difference_type>(get_max_threads(), total_length));
- const _Settings& __s = _Settings::get();
+ std::min<difference_type>(get_max_threads(), total_length));
# pragma omp parallel num_threads (num_threads)
{
@@ -1562,126 +1333,12 @@ template<typename RandomAccessIteratorIterator,
for (int s = 0; s < num_threads; ++s)
pieces[s].resize(k);
- difference_type num_samples = __s.merge_oversampling
- * num_threads;
+ difference_type num_samples =
+ __gnu_parallel::_Settings::get().merge_oversampling *
+ num_threads;
- if (__s.multiway_merge_splitting == SAMPLING)
- {
- value_type* samples = static_cast<value_type*>(
- ::operator new(sizeof(value_type) * k * num_samples));
- // Sample.
- for (int s = 0; s < k; ++s)
- for (difference_type i = 0; i < num_samples; ++i)
- {
- difference_type sample_index =
- static_cast<difference_type>(
- _GLIBCXX_PARALLEL_LENGTH(seqs_begin[s])
- * (double(i + 1) / (num_samples + 1))
- * (double(length) / total_length));
- ::new(&(samples[s * num_samples + i]))
- value_type(seqs_begin[s].first[sample_index]);
- }
-
- if (stable)
- __gnu_sequential::stable_sort(samples, samples
- + (num_samples * k), comp);
- else
- __gnu_sequential::sort(samples, samples
- + (num_samples * k), comp);
-
- for (int slab = 0; slab < num_threads; ++slab)
- // For each slab / processor.
- for (int seq = 0; seq < k; ++seq)
- {
- // For each sequence.
- if (slab > 0)
- pieces[slab][seq].first =
- std::upper_bound(seqs_begin[seq].first,
- seqs_begin[seq].second,
- samples[num_samples * k
- * slab / num_threads],
- comp)
- - seqs_begin[seq].first;
- else
- {
- // Absolute beginning.
- pieces[slab][seq].first = 0;
- }
- if ((slab + 1) < num_threads)
- pieces[slab][seq].second =
- std::upper_bound(seqs_begin[seq].first,
- seqs_begin[seq].second,
- samples[num_samples * k
- * (slab + 1)
- / num_threads], comp)
- - seqs_begin[seq].first;
- else
- pieces[slab][seq].second
- = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
- }
- ::operator delete(samples);
- }
- else
- {
- // (_Settings::multiway_merge_splitting == _Settings::EXACT).
- std::vector<RandomAccessIterator1>* offsets =
- new std::vector<RandomAccessIterator1>[num_threads];
- std::vector<
- std::pair<RandomAccessIterator1, RandomAccessIterator1>
- > se(k);
-
- copy(seqs_begin, seqs_end, se.begin());
-
- difference_type* borders =
- new difference_type[num_threads + 1];
- equally_split(length, num_threads, borders);
-
- for (int s = 0; s < (num_threads - 1); ++s)
- {
- offsets[s].resize(k);
- multiseq_partition(
- se.begin(), se.end(), borders[s + 1],
- offsets[s].begin(), comp);
-
- // Last one also needed and available.
- if (!tight)
- {
- offsets[num_threads - 1].resize(k);
- multiseq_partition(se.begin(), se.end(),
- difference_type(length),
- offsets[num_threads - 1].begin(),
- comp);
- }
- }
-
-
- for (int slab = 0; slab < num_threads; ++slab)
- {
- // For each slab / processor.
- for (int seq = 0; seq < k; ++seq)
- {
- // For each sequence.
- if (slab == 0)
- {
- // Absolute beginning.
- pieces[slab][seq].first = 0;
- }
- else
- pieces[slab][seq].first =
- pieces[slab - 1][seq].second;
- if (!tight || slab < (num_threads - 1))
- pieces[slab][seq].second =
- offsets[slab][seq] - seqs_begin[seq].first;
- else
- {
- // slab == num_threads - 1
- pieces[slab][seq].second =
- _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
- }
- }
- }
- delete[] offsets;
- }
+ splitter(seqs_begin, seqs_end, comp, length, total_length,
+ pieces);
} //single
thread_index_t iam = omp_get_thread_num();
@@ -1701,15 +1358,14 @@ template<typename RandomAccessIteratorIterator,
for (int s = 0; s < k; ++s)
{
chunks[s] = std::make_pair(
- seqs_begin[s].first + pieces[iam][s].first,
- seqs_begin[s].first + pieces[iam][s].second);
+ seqs_begin[s].first + pieces[iam][s].first,
+ seqs_begin[s].first + pieces[iam][s].second);
local_length += _GLIBCXX_PARALLEL_LENGTH(chunks[s]);
}
- multiway_merge(
+ sequential_multiway_merge<stable, sentinels>(
chunks, chunks + k, target + target_position, comp,
- std::min(local_length, length - target_position),
- stable, false, sequential_tag());
+ std::min(local_length, length - target_position));
delete[] chunks;
}
@@ -1727,7 +1383,7 @@ template<typename RandomAccessIteratorIterator,
(pieces[iam][1].second - pieces[iam][1].first),
comp);
}
- } //parallel
+ } // parallel
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
@@ -1743,88 +1399,605 @@ template<typename RandomAccessIteratorIterator,
}
/**
- * @brief Multi-way merging front-end.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @return End iterator of output sequence.
+ * @brief Multiway Merge Frontend.
+ *
+ * Merge the sequences specified by seqs_begin and seqs_end into
+ * target. seqs_begin and seqs_end must point to a sequence of
+ * pairs. These pairs must contain an iterator to the beginning
+ * of a sequence in their first entry and an iterator the end of
+ * the same sequence in their second entry.
+ *
+ * Ties are broken arbitrarily. See stable_multiway_merge for a variant
+ * that breaks ties by sequence number but is slower.
+ *
+ * The first entries of the pairs (i.e. the begin iterators) will be moved
+ * forward.
+ *
+ * The output sequence has to provide enough space for all elements
+ * that are written to it.
+ *
+ * This function will merge the input sequences:
+ *
+ * - not stable
+ * - parallel, depending on the input size and Settings
+ * - using sampling for splitting
+ * - not using sentinels
+ *
+ * Example:
+ *
+ * <pre>
+ * int sequences[10][10];
+ * for (int i = 0; i < 10; ++i)
+ * for (int j = 0; i < 10; ++j)
+ * sequences[i][j] = j;
+ *
+ * int out[33];
+ * std::vector<std::pair<int*> > seqs;
+ * for (int i = 0; i < 10; ++i)
+ * { seqs.push(std::make_pair<int*>(sequences[i], sequences[i] + 10)) }
+ *
+ * multiway_merge(seqs.begin(), seqs.end(), target, std::less<int>(), 33);
+ * </pre>
+ *
+ * @see stable_multiway_merge
+ *
+ * @pre All input sequences must be sorted.
+ * @pre Target must provide enough space to merge out length elements or
+ * the number of elements in all sequences, whichever is smaller.
+ *
+ * @post [target, return value) contains merged elements from the
+ * input sequences.
+ * @post return value - target = min(length, number of elements in all
+ * sequences).
+ *
+ * @param RandomAccessIteratorPairIterator iterator over sequence
+ * of pairs of iterators
+ * @param RandomAccessIteratorOut iterator over target sequence
+ * @param _DifferenceTp difference type for the sequence
+ * @param Comparator strict weak ordering type to compare elements
+ * in sequences
+ *
+ * @param seqs_begin begin of sequence sequence
+ * @param seqs_end end of sequence sequence
+ * @param target target sequence to merge to.
+ * @param comp strict weak ordering to use for element comparison.
+ * @param length the number of elements to merge into target.
+ *
+ * @return end iterator of output sequence
*/
-template<typename RandomAccessIteratorPairIterator,
- typename RandomAccessIterator3,
- typename _DifferenceTp,
- typename Comparator>
- RandomAccessIterator3
- multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
- RandomAccessIteratorPairIterator seqs_end,
- RandomAccessIterator3 target, Comparator comp,
- _DifferenceTp length, bool stable)
- {
+template<
+ typename RandomAccessIteratorPairIterator
+ , typename RandomAccessIteratorOut
+ , typename _DifferenceTp
+ , typename Comparator>
+RandomAccessIteratorOut
+multiway_merge(RandomAccessIteratorPairIterator seqs_begin
+ , RandomAccessIteratorPairIterator seqs_end
+ , RandomAccessIteratorOut target
+ , Comparator comp, _DifferenceTp length)
+{
+ typedef _DifferenceTp difference_type;
+ _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+ // catch special case: no sequences
+ if (seqs_begin == seqs_end)
+ return target;
+
+ // Execute merge; maybe parallel, depending on the number of merged
+ // elements and the number of sequences and global thresholds in
+ // Settings.
+ RandomAccessIteratorOut target_end;
+ if ((seqs_end - seqs_begin > 1) &&
+ _GLIBCXX_PARALLEL_CONDITION(
+ ((seqs_end - seqs_begin) >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+ && ((sequence_index_t)length >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+ target_end = parallel_multiway_merge
+ </* stable = */ false, /* sentinels = */ false>
+ (seqs_begin, seqs_end, target, comp,
+ multiway_merge_sampling_splitting</* stable = */ false,
+ RandomAccessIteratorPairIterator, Comparator, _DifferenceTp>,
+ static_cast<difference_type>(length));
+ else
+ target_end = sequential_multiway_merge
+ </* stable = */false, /* sentinels = */ false>(
+ seqs_begin, seqs_end,
+ target, comp, length);
+
+ return target_end;
+}
+
+template<
+ typename RandomAccessIteratorPairIterator
+ , typename RandomAccessIteratorOut
+ , typename _DifferenceTp
+ , typename Comparator>
+RandomAccessIteratorOut
+multiway_merge(RandomAccessIteratorPairIterator seqs_begin
+ , RandomAccessIteratorPairIterator seqs_end
+ , RandomAccessIteratorOut target
+ , Comparator comp, _DifferenceTp length
+ , __gnu_parallel::sequential_tag)
+{
+ typedef _DifferenceTp difference_type;
+ _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+ // catch special case: no sequences
+ if (seqs_begin == seqs_end)
+ return target;
+
+ // Execute multiway merge *sequentially*.
+ return sequential_multiway_merge
+ </* stable = */ false, /* sentinels = */ false>
+ (seqs_begin, seqs_end, target, comp, length);
+}
+
+template<
+ typename RandomAccessIteratorPairIterator
+ , typename RandomAccessIteratorOut
+ , typename _DifferenceTp
+ , typename Comparator>
+RandomAccessIteratorOut
+multiway_merge(RandomAccessIteratorPairIterator seqs_begin
+ , RandomAccessIteratorPairIterator seqs_end
+ , RandomAccessIteratorOut target
+ , Comparator comp, _DifferenceTp length
+ , __gnu_parallel::exact_tag)
+{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
+ // catch special case: no sequences
if (seqs_begin == seqs_end)
return target;
- const _Settings& __s = _Settings::get();
-
- RandomAccessIterator3 target_end;
- if (_GLIBCXX_PARALLEL_CONDITION(
- ((seqs_end - seqs_begin) >= __s.multiway_merge_minimal_k)
- && ((sequence_index_t)length >= __s.multiway_merge_minimal_n)))
- target_end = parallel_multiway_merge(seqs_begin, seqs_end,
- target, comp,
- static_cast<difference_type>(length),
- stable, false);
+ // Execute merge; maybe parallel, depending on the number of merged
+ // elements and the number of sequences and global thresholds in
+ // Settings.
+ RandomAccessIteratorOut target_end;
+ if ((seqs_end - seqs_begin > 1) &&
+ _GLIBCXX_PARALLEL_CONDITION(
+ ((seqs_end - seqs_begin) >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+ && ((sequence_index_t)length >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+ target_end = parallel_multiway_merge
+ </* stable = */ false, /* sentinels = */ false>(
+ seqs_begin, seqs_end,
+ target, comp,
+ multiway_merge_exact_splitting</* stable = */ false,
+ RandomAccessIteratorPairIterator, Comparator, _DifferenceTp>,
+ static_cast<difference_type>(length));
else
- target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length,
- stable, false, sequential_tag());
+ target_end = sequential_multiway_merge
+ </* stable = */ false, /* sentinels = */ false>(
+ seqs_begin, seqs_end,
+ target, comp, length);
return target_end;
- }
+}
-/** @brief Multi-way merging front-end.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @return End iterator of output sequence.
- * @pre For each @c i, @c seqs_begin[i].second must be the end
- * marker of the sequence, but also reference the one more sentinel
- * element. */
-template<typename RandomAccessIteratorPairIterator,
- typename RandomAccessIterator3,
- typename _DifferenceTp,
- typename Comparator>
- RandomAccessIterator3
- multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
- RandomAccessIteratorPairIterator seqs_end,
- RandomAccessIterator3 target,
- Comparator comp,
- _DifferenceTp length,
- bool stable)
- {
+template<
+ typename RandomAccessIteratorPairIterator
+ , typename RandomAccessIteratorOut
+ , typename _DifferenceTp
+ , typename Comparator>
+RandomAccessIteratorOut
+stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin
+ , RandomAccessIteratorPairIterator seqs_end
+ , RandomAccessIteratorOut target
+ , Comparator comp, _DifferenceTp length)
+{
typedef _DifferenceTp difference_type;
+ _GLIBCXX_CALL(seqs_end - seqs_begin)
+ // catch special case: no sequences
if (seqs_begin == seqs_end)
return target;
+ // Execute merge; maybe parallel, depending on the number of merged
+ // elements and the number of sequences and global thresholds in
+ // Settings.
+ RandomAccessIteratorOut target_end;
+ if ((seqs_end - seqs_begin > 1) &&
+ _GLIBCXX_PARALLEL_CONDITION(
+ ((seqs_end - seqs_begin) >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+ && ((sequence_index_t)length >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+ target_end = parallel_multiway_merge
+ </* stable = */ true, /* sentinels = */ false>(
+ seqs_begin, seqs_end,
+ target, comp,
+ multiway_merge_sampling_splitting</* stable = */ true,
+ RandomAccessIteratorPairIterator, Comparator, _DifferenceTp>,
+ static_cast<difference_type>(length));
+ else
+ target_end = sequential_multiway_merge
+ </* stable = */ true, /* sentinels = */ false>(
+ seqs_begin, seqs_end,
+ target, comp, length);
+
+ return target_end;
+}
+
+template<
+ typename RandomAccessIteratorPairIterator
+ , typename RandomAccessIteratorOut
+ , typename _DifferenceTp
+ , typename Comparator>
+RandomAccessIteratorOut
+stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin
+ , RandomAccessIteratorPairIterator seqs_end
+ , RandomAccessIteratorOut target
+ , Comparator comp, _DifferenceTp length
+ , __gnu_parallel::sequential_tag)
+{
+ typedef _DifferenceTp difference_type;
+ _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+ // catch special case: no sequences
+ if (seqs_begin == seqs_end)
+ { return target; }
+
+ // Execute multiway merge *sequentially*.
+ return sequential_multiway_merge
+ </* stable = */ true, /* sentinels = */ false>
+ (seqs_begin, seqs_end, target, comp, length);
+}
+
+template<
+ typename RandomAccessIteratorPairIterator
+ , typename RandomAccessIteratorOut
+ , typename _DifferenceTp
+ , typename Comparator>
+RandomAccessIteratorOut
+stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin
+ , RandomAccessIteratorPairIterator seqs_end
+ , RandomAccessIteratorOut target
+ , Comparator comp, _DifferenceTp length
+ , __gnu_parallel::exact_tag)
+{
+ typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
- const _Settings& __s = _Settings::get();
- const bool cond1 = seqs_end - seqs_begin >= __s.multiway_merge_minimal_k;
- const bool cond2 = sequence_index_t(length) >= __s.multiway_merge_minimal_n;
- if (_GLIBCXX_PARALLEL_CONDITION(cond1 && cond2))
- return parallel_multiway_merge(seqs_begin, seqs_end, target, comp,
- length, stable, true);
+ // catch special case: no sequences
+ if (seqs_begin == seqs_end)
+ { return target; }
+
+ // Execute merge; maybe parallel, depending on the number of merged
+ // elements and the number of sequences and global thresholds in
+ // Settings.
+ RandomAccessIteratorOut target_end;
+ if ((seqs_end - seqs_begin > 1) &&
+ _GLIBCXX_PARALLEL_CONDITION(
+ ((seqs_end - seqs_begin) >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+ && ((sequence_index_t)length >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+ target_end = parallel_multiway_merge
+ </* stable = */ true, /* sentinels = */ false>(
+ seqs_begin, seqs_end,
+ target, comp,
+ multiway_merge_exact_splitting
+ </* stable = */ true, RandomAccessIteratorPairIterator,
+ Comparator, _DifferenceTp>,
+ static_cast<difference_type>(length));
else
- return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable,
- true, sequential_tag());
- }
+ target_end = sequential_multiway_merge</* stable = */ true,
+ /* sentinels = */ false>(
+ seqs_begin, seqs_end,
+ target, comp, length);
+
+ return target_end;
+}
+
+/**
+ * @brief Multiway Merge Frontend.
+ *
+ * Merge the sequences specified by seqs_begin and seqs_end into
+ * target. seqs_begin and seqs_end must point to a sequence of
+ * pairs. These pairs must contain an iterator to the beginning
+ * of a sequence in their first entry and an iterator the end of
+ * the same sequence in their second entry.
+ *
+ * Ties are broken arbitrarily. See stable_multiway_merge for a variant
+ * that breaks ties by sequence number but is slower.
+ *
+ * The first entries of the pairs (i.e. the begin iterators) will be moved
+ * forward.
+ *
+ * The output sequence has to provide enough space for all elements
+ * that are written to it.
+ *
+ * This function will merge the input sequences:
+ *
+ * - not stable
+ * - parallel, depending on the input size and Settings
+ * - using sampling for splitting
+ * - using sentinels
+ *
+ * You have to take care that the element the end iterator points to is
+ * readable and contains a value that is greater than any other non-sentinel
+ * value in all sequences.
+ *
+ * Example:
+ *
+ * <pre>
+ * int sequences[10][11];
+ * for (int i = 0; i < 10; ++i)
+ * for (int j = 0; i < 11; ++j)
+ * sequences[i][j] = j; // last one is sentinel!
+ *
+ * int out[33];
+ * std::vector<std::pair<int*> > seqs;
+ * for (int i = 0; i < 10; ++i)
+ * { seqs.push(std::make_pair<int*>(sequences[i], sequences[i] + 10)) }
+ *
+ * multiway_merge(seqs.begin(), seqs.end(), target, std::less<int>(), 33);
+ * </pre>
+ *
+ * @pre All input sequences must be sorted.
+ * @pre Target must provide enough space to merge out length elements or
+ * the number of elements in all sequences, whichever is smaller.
+ * @pre For each @c i, @c seqs_begin[i].second must be the end
+ * marker of the sequence, but also reference the one more sentinel
+ * element.
+ *
+ * @post [target, return value) contains merged elements from the
+ * input sequences.
+ * @post return value - target = min(length, number of elements in all
+ * sequences).
+ *
+ * @see stable_multiway_merge_sentinels
+ *
+ * @param RandomAccessIteratorPairIterator iterator over sequence
+ * of pairs of iterators
+ * @param RandomAccessIteratorOut iterator over target sequence
+ * @param _DifferenceTp difference type for the sequence
+ * @param Comparator strict weak ordering type to compare elements
+ * in sequences
+ *
+ * @param seqs_begin begin of sequence sequence
+ * @param seqs_end end of sequence sequence
+ * @param target target sequence to merge to.
+ * @param comp strict weak ordering to use for element comparison.
+ * @param length the number of elements to merge into target.
+ *
+ * @return end iterator of output sequence
+ */
+template<
+ typename RandomAccessIteratorPairIterator
+ , typename RandomAccessIteratorOut
+ , typename _DifferenceTp
+ , typename Comparator>
+RandomAccessIteratorOut
+multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
+ , RandomAccessIteratorPairIterator seqs_end
+ , RandomAccessIteratorOut target
+ , Comparator comp, _DifferenceTp length)
+{
+ typedef _DifferenceTp difference_type;
+ _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+ // catch special case: no sequences
+ if (seqs_begin == seqs_end)
+ { return target; }
+
+ // Execute merge; maybe parallel, depending on the number of merged
+ // elements and the number of sequences and global thresholds in
+ // Settings.
+ RandomAccessIteratorOut target_end;
+ if ((seqs_end - seqs_begin > 1) &&
+ _GLIBCXX_PARALLEL_CONDITION(
+ ((seqs_end - seqs_begin) >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+ && ((sequence_index_t)length >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+ target_end = parallel_multiway_merge
+ </* stable = */ false, /* sentinels = */ true>
+ (seqs_begin, seqs_end, target, comp,
+ multiway_merge_sampling_splitting
+ </* stable = */ false, RandomAccessIteratorPairIterator,
+ Comparator, _DifferenceTp>,
+ static_cast<difference_type>(length));
+ else
+ target_end = sequential_multiway_merge
+ </* stable = */false, /* sentinels = */ true>(
+ seqs_begin, seqs_end,
+ target, comp, length);
+
+ return target_end;
}
+template<
+ typename RandomAccessIteratorPairIterator
+ , typename RandomAccessIteratorOut
+ , typename _DifferenceTp
+ , typename Comparator>
+RandomAccessIteratorOut
+multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
+ , RandomAccessIteratorPairIterator seqs_end
+ , RandomAccessIteratorOut target
+ , Comparator comp, _DifferenceTp length
+ , __gnu_parallel::sequential_tag)
+{
+ typedef _DifferenceTp difference_type;
+ _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+ // catch special case: no sequences
+ if (seqs_begin == seqs_end)
+ { return target; }
+
+ // Execute multiway merge *sequentially*.
+ return sequential_multiway_merge
+ </* stable = */ false, /* sentinels = */ true>
+ (seqs_begin, seqs_end, target, comp, length);
+}
+
+template<
+ typename RandomAccessIteratorPairIterator
+ , typename RandomAccessIteratorOut
+ , typename _DifferenceTp
+ , typename Comparator>
+RandomAccessIteratorOut
+multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
+ , RandomAccessIteratorPairIterator seqs_end
+ , RandomAccessIteratorOut target
+ , Comparator comp, _DifferenceTp length
+ , __gnu_parallel::exact_tag)
+{
+ typedef _DifferenceTp difference_type;
+ _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+ // catch special case: no sequences
+ if (seqs_begin == seqs_end)
+ { return target; }
+
+ // Execute merge; maybe parallel, depending on the number of merged
+ // elements and the number of sequences and global thresholds in
+ // Settings.
+ RandomAccessIteratorOut target_end;
+ if ((seqs_end - seqs_begin > 1) &&
+ _GLIBCXX_PARALLEL_CONDITION(
+ ((seqs_end - seqs_begin) >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+ && ((sequence_index_t)length >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+ target_end = parallel_multiway_merge
+ </* stable = */ false, /* sentinels = */ true>(
+ seqs_begin, seqs_end,
+ target, comp,
+ multiway_merge_exact_splitting
+ </* stable = */ false, RandomAccessIteratorPairIterator,
+ Comparator, _DifferenceTp>,
+ static_cast<difference_type>(length));
+ else
+ target_end = sequential_multiway_merge
+ </* stable = */ false, /* sentinels = */ true>(
+ seqs_begin, seqs_end,
+ target, comp, length);
+
+ return target_end;
+}
+
+template<
+ typename RandomAccessIteratorPairIterator
+ , typename RandomAccessIteratorOut
+ , typename _DifferenceTp
+ , typename Comparator>
+RandomAccessIteratorOut
+stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
+ , RandomAccessIteratorPairIterator seqs_end
+ , RandomAccessIteratorOut target
+ , Comparator comp, _DifferenceTp length)
+{
+ typedef _DifferenceTp difference_type;
+ _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+ // catch special case: no sequences
+ if (seqs_begin == seqs_end)
+ { return target; }
+
+ // Execute merge; maybe parallel, depending on the number of merged
+ // elements and the number of sequences and global thresholds in
+ // Settings.
+ RandomAccessIteratorOut target_end;
+ if ((seqs_end - seqs_begin > 1) &&
+ _GLIBCXX_PARALLEL_CONDITION(
+ ((seqs_end - seqs_begin) >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+ && ((sequence_index_t)length >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+ target_end = parallel_multiway_merge
+ </* stable = */ true, /* sentinels = */ true>(
+ seqs_begin, seqs_end,
+ target, comp,
+ multiway_merge_sampling_splitting
+ </* stable = */ true, RandomAccessIteratorPairIterator,
+ Comparator, _DifferenceTp>,
+ static_cast<difference_type>(length));
+ else
+ target_end = sequential_multiway_merge
+ </* stable = */ true, /* sentinels = */ true>(
+ seqs_begin, seqs_end,
+ target, comp, length);
+
+ return target_end;
+}
+
+template<
+ typename RandomAccessIteratorPairIterator
+ , typename RandomAccessIteratorOut
+ , typename _DifferenceTp
+ , typename Comparator>
+RandomAccessIteratorOut
+stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
+ , RandomAccessIteratorPairIterator seqs_end
+ , RandomAccessIteratorOut target
+ , Comparator comp, _DifferenceTp length
+ , __gnu_parallel::sequential_tag)
+{
+ typedef _DifferenceTp difference_type;
+ _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+ // catch special case: no sequences
+ if (seqs_begin == seqs_end)
+ { return target; }
+
+ // Execute multiway merge *sequentially*.
+ return sequential_multiway_merge
+ </* stable = */ true, /* sentinels = */ true>
+ (seqs_begin, seqs_end, target, comp, length);
+}
+
+template<
+ typename RandomAccessIteratorPairIterator
+ , typename RandomAccessIteratorOut
+ , typename _DifferenceTp
+ , typename Comparator>
+RandomAccessIteratorOut
+stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
+ , RandomAccessIteratorPairIterator seqs_end
+ , RandomAccessIteratorOut target
+ , Comparator comp, _DifferenceTp length
+ , __gnu_parallel::exact_tag)
+{
+ typedef _DifferenceTp difference_type;
+ _GLIBCXX_CALL(seqs_end - seqs_begin)
+
+ // catch special case: no sequences
+ if (seqs_begin == seqs_end)
+ { return target; }
+
+ // Execute merge; maybe parallel, depending on the number of merged
+ // elements and the number of sequences and global thresholds in
+ // Settings.
+ RandomAccessIteratorOut target_end;
+ if ((seqs_end - seqs_begin > 1) &&
+ _GLIBCXX_PARALLEL_CONDITION(
+ ((seqs_end - seqs_begin) >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+ && ((sequence_index_t)length >=
+ __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+ target_end = parallel_multiway_merge
+ </* stable = */ true, /* sentinels = */ true>(
+ seqs_begin, seqs_end,
+ target, comp,
+ multiway_merge_exact_splitting
+ </* stable = */ true, RandomAccessIteratorPairIterator,
+ Comparator, _DifferenceTp>,
+ static_cast<difference_type>(length));
+ else
+ target_end = sequential_multiway_merge
+ </* stable = */ true, /* sentinels = */ true>(
+ seqs_begin, seqs_end,
+ target, comp, length);
+
+ return target_end;
+}
+
+}; // namespace __gnu_parallel
+
#endif
diff --git a/libstdc++-v3/include/parallel/multiway_mergesort.h b/libstdc++-v3/include/parallel/multiway_mergesort.h
index c8ceb2f..3791a14 100644
--- a/libstdc++-v3/include/parallel/multiway_mergesort.h
+++ b/libstdc++-v3/include/parallel/multiway_mergesort.h
@@ -80,26 +80,9 @@ template<typename RandomAccessIterator>
/** @brief Start indices, per thread. */
difference_type* starts;
- /** @brief Temporary arrays for each thread.
- *
- * Indirection Allows using the temporary storage in different
- * ways, without code duplication.
- * @see _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST */
- value_type** temporaries;
-
-#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
/** @brief Storage in which to sort. */
- RandomAccessIterator* sorting_places;
+ value_type** temporary;
- /** @brief Storage into which to merge. */
- value_type** merging_places;
-#else
- /** @brief Storage in which to sort. */
- value_type** sorting_places;
-
- /** @brief Storage into which to merge. */
- RandomAccessIterator* merging_places;
-#endif
/** @brief Samples. */
value_type* samples;
@@ -108,9 +91,6 @@ template<typename RandomAccessIterator>
/** @brief Pieces of data to merge @c [thread][sequence] */
std::vector<Piece<difference_type> >* pieces;
-
- /** @brief Stable sorting desired. */
- bool stable;
};
/**
@@ -122,7 +102,7 @@ template<typename RandomAccessIterator>
template<typename RandomAccessIterator, typename _DifferenceTp>
void
determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
- _DifferenceTp& num_samples)
+ _DifferenceTp num_samples)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
@@ -130,8 +110,6 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
thread_index_t iam = omp_get_thread_num();
- num_samples = _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
-
difference_type* es = new difference_type[num_samples + 2];
equally_split(sd->starts[iam + 1] - sd->starts[iam],
@@ -144,11 +122,201 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
delete[] es;
}
+/** @brief Split consistently. */
+template<bool exact, typename RandomAccessIterator,
+ typename Comparator, typename SortingPlacesIterator>
+ struct split_consistently
+ {
+ };
+
+/** @brief Split by exact splitting. */
+template<typename RandomAccessIterator, typename Comparator,
+ typename SortingPlacesIterator>
+ struct split_consistently
+ <true, RandomAccessIterator, Comparator, SortingPlacesIterator>
+ {
+ void operator()(
+ const thread_index_t iam,
+ PMWMSSortingData<RandomAccessIterator>* sd,
+ Comparator& comp,
+ const typename
+ std::iterator_traits<RandomAccessIterator>::difference_type
+ num_samples)
+ const
+ {
+# pragma omp barrier
+
+ std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
+ seqs(sd->num_threads);
+ for (thread_index_t s = 0; s < sd->num_threads; s++)
+ seqs[s] = std::make_pair(sd->temporary[s],
+ sd->temporary[s]
+ + (sd->starts[s + 1] - sd->starts[s]));
+
+ std::vector<SortingPlacesIterator> offsets(sd->num_threads);
+
+ // if not last thread
+ if (iam < sd->num_threads - 1)
+ multiseq_partition(seqs.begin(), seqs.end(),
+ sd->starts[iam + 1], offsets.begin(), comp);
+
+ for (int seq = 0; seq < sd->num_threads; seq++)
+ {
+ // for each sequence
+ if (iam < (sd->num_threads - 1))
+ sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
+ else
+ // very end of this sequence
+ sd->pieces[iam][seq].end =
+ sd->starts[seq + 1] - sd->starts[seq];
+ }
+
+# pragma omp barrier
+
+ for (thread_index_t seq = 0; seq < sd->num_threads; seq++)
+ {
+ // For each sequence.
+ if (iam > 0)
+ sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
+ else
+ // Absolute beginning.
+ sd->pieces[iam][seq].begin = 0;
+ }
+ }
+ };
+
+/** @brief Split by sampling. */
+template<typename RandomAccessIterator, typename Comparator,
+ typename SortingPlacesIterator>
+ struct split_consistently<false, RandomAccessIterator, Comparator,
+ SortingPlacesIterator>
+ {
+ void operator()(
+ const thread_index_t iam,
+ PMWMSSortingData<RandomAccessIterator>* sd,
+ Comparator& comp,
+ const typename
+ std::iterator_traits<RandomAccessIterator>::difference_type
+ num_samples)
+ const
+ {
+ typedef std::iterator_traits<RandomAccessIterator> traits_type;
+ typedef typename traits_type::value_type value_type;
+ typedef typename traits_type::difference_type difference_type;
+
+ determine_samples(sd, num_samples);
+
+# pragma omp barrier
+
+# pragma omp single
+ __gnu_sequential::sort(sd->samples,
+ sd->samples + (num_samples * sd->num_threads),
+ comp);
+
+# pragma omp barrier
+
+ for (thread_index_t s = 0; s < sd->num_threads; ++s)
+ {
+ // For each sequence.
+ if (num_samples * iam > 0)
+ sd->pieces[iam][s].begin =
+ std::lower_bound(sd->temporary[s],
+ sd->temporary[s]
+ + (sd->starts[s + 1] - sd->starts[s]),
+ sd->samples[num_samples * iam],
+ comp)
+ - sd->temporary[s];
+ else
+ // Absolute beginning.
+ sd->pieces[iam][s].begin = 0;
+
+ if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
+ sd->pieces[iam][s].end =
+ std::lower_bound(sd->temporary[s],
+ sd->temporary[s]
+ + (sd->starts[s + 1] - sd->starts[s]),
+ sd->samples[num_samples * (iam + 1)],
+ comp)
+ - sd->temporary[s];
+ else
+ // Absolute end.
+ sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
+ }
+ }
+ };
+
+template<bool stable, typename RandomAccessIterator, typename Comparator>
+ struct possibly_stable_sort
+ {
+ };
+
+template<typename RandomAccessIterator, typename Comparator>
+ struct possibly_stable_sort<true, RandomAccessIterator, Comparator>
+ {
+ void operator()(const RandomAccessIterator& begin,
+ const RandomAccessIterator& end, Comparator& comp) const
+ {
+ __gnu_sequential::stable_sort(begin, end, comp);
+ }
+ };
+
+template<typename RandomAccessIterator, typename Comparator>
+ struct possibly_stable_sort<false, RandomAccessIterator, Comparator>
+ {
+ void operator()(const RandomAccessIterator begin,
+ const RandomAccessIterator end, Comparator& comp) const
+ {
+ __gnu_sequential::sort(begin, end, comp);
+ }
+ };
+
+template<bool stable, typename SeqRandomAccessIterator,
+ typename RandomAccessIterator, typename Comparator,
+ typename DiffType>
+ struct possibly_stable_multiway_merge
+ {
+ };
+
+template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
+ typename Comparator, typename DiffType>
+ struct possibly_stable_multiway_merge
+ <true, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
+ DiffType>
+ {
+ void operator()(const SeqRandomAccessIterator& seqs_begin,
+ const SeqRandomAccessIterator& seqs_end,
+ const RandomAccessIterator& target,
+ Comparator& comp,
+ DiffType length_am) const
+ {
+ stable_multiway_merge(seqs_begin, seqs_end, target, comp,
+ length_am, sequential_tag());
+ }
+ };
+
+template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
+ typename Comparator, typename DiffType>
+ struct possibly_stable_multiway_merge
+ <false, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
+ DiffType>
+ {
+ void operator()(const SeqRandomAccessIterator& seqs_begin,
+ const SeqRandomAccessIterator& seqs_end,
+ const RandomAccessIterator& target,
+ Comparator& comp,
+ DiffType length_am) const
+ {
+ multiway_merge(seqs_begin, seqs_end, target, comp,
+ length_am, sequential_tag());
+ }
+ };
+
/** @brief PMWMS code executed by each thread.
* @param sd Pointer to algorithm data.
* @param comp Comparator.
*/
-template<typename RandomAccessIterator, typename Comparator>
+template<bool stable, bool exact, typename RandomAccessIterator,
+ typename Comparator>
void
parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
Comparator& comp)
@@ -162,165 +330,65 @@ template<typename RandomAccessIterator, typename Comparator>
// Length of this thread's chunk, before merging.
difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];
-#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
- typedef RandomAccessIterator SortingPlacesIterator;
+ // Sort in temporary storage, leave space for sentinel.
- // Sort in input storage.
- sd->sorting_places[iam] = sd->source + sd->starts[iam];
-#else
typedef value_type* SortingPlacesIterator;
- // Sort in temporary storage, leave space for sentinel.
- sd->sorting_places[iam] = sd->temporaries[iam] =
+ sd->temporary[iam] =
static_cast<value_type*>(
::operator new(sizeof(value_type) * (length_local + 1)));
// Copy there.
std::uninitialized_copy(sd->source + sd->starts[iam],
sd->source + sd->starts[iam] + length_local,
- sd->sorting_places[iam]);
-#endif
-
- // Sort locally.
- if (sd->stable)
- __gnu_sequential::stable_sort(sd->sorting_places[iam],
- sd->sorting_places[iam] + length_local,
- comp);
- else
- __gnu_sequential::sort(sd->sorting_places[iam],
- sd->sorting_places[iam] + length_local,
- comp);
-
- // Invariant: locally sorted subsequence in sd->sorting_places[iam],
- // sd->sorting_places[iam] + length_local.
- const _Settings& __s = _Settings::get();
- if (__s.sort_splitting == SAMPLING)
- {
- difference_type num_samples;
- determine_samples(sd, num_samples);
-
-# pragma omp barrier
-
-# pragma omp single
- __gnu_sequential::sort(sd->samples,
- sd->samples + (num_samples * sd->num_threads),
- comp);
-
-# pragma omp barrier
-
- for (int s = 0; s < sd->num_threads; ++s)
- {
- // For each sequence.
- if (num_samples * iam > 0)
- sd->pieces[iam][s].begin =
- std::lower_bound(sd->sorting_places[s],
- sd->sorting_places[s]
- + (sd->starts[s + 1] - sd->starts[s]),
- sd->samples[num_samples * iam],
- comp)
- - sd->sorting_places[s];
- else
- // Absolute beginning.
- sd->pieces[iam][s].begin = 0;
-
- if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
- sd->pieces[iam][s].end =
- std::lower_bound(sd->sorting_places[s],
- sd->sorting_places[s]
- + (sd->starts[s + 1] - sd->starts[s]),
- sd->samples[num_samples * (iam + 1)],
- comp)
- - sd->sorting_places[s];
- else
- // Absolute end.
- sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
- }
- }
- else if (__s.sort_splitting == EXACT)
- {
-# pragma omp barrier
+ sd->temporary[iam]);
- std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
- seqs(sd->num_threads);
- for (int s = 0; s < sd->num_threads; ++s)
- seqs[s] = std::make_pair(sd->sorting_places[s],
- sd->sorting_places[s]
- + (sd->starts[s + 1] - sd->starts[s]));
+ possibly_stable_sort<stable, SortingPlacesIterator, Comparator>()
+ (sd->temporary[iam], sd->temporary[iam] + length_local, comp);
- std::vector<SortingPlacesIterator> offsets(sd->num_threads);
+ // Invariant: locally sorted subsequence in sd->temporary[iam],
+ // sd->temporary[iam] + length_local.
- // if not last thread
- if (iam < sd->num_threads - 1)
- multiseq_partition(seqs.begin(), seqs.end(),
- sd->starts[iam + 1], offsets.begin(), comp);
+ // No barrier here: Synchronization is done by the splitting routine.
- for (int seq = 0; seq < sd->num_threads; ++seq)
- {
- // for each sequence
- if (iam < (sd->num_threads - 1))
- sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
- else
- // very end of this sequence
- sd->pieces[iam][seq].end = (sd->starts[seq + 1]
- - sd->starts[seq]);
- }
-
-# pragma omp barrier
-
- for (int seq = 0; seq < sd->num_threads; ++seq)
- {
- // For each sequence.
- if (iam > 0)
- sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
- else
- // Absolute beginning.
- sd->pieces[iam][seq].begin = 0;
- }
- }
+ difference_type num_samples =
+ _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
+ split_consistently
+ <exact, RandomAccessIterator, Comparator, SortingPlacesIterator>()
+ (iam, sd, comp, num_samples);
// Offset from target begin, length after merging.
difference_type offset = 0, length_am = 0;
- for (int s = 0; s < sd->num_threads; ++s)
+ for (thread_index_t s = 0; s < sd->num_threads; s++)
{
length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
offset += sd->pieces[iam][s].begin;
}
-#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
- // Merge to temporary storage, uninitialized creation not possible
- // since there is no multiway_merge calling the placement new
- // instead of the assignment operator.
- // XXX incorrect (de)construction
- sd->merging_places[iam] = sd->temporaries[iam] =
- static_cast<value_type*>(::operator new(sizeof(value_type)
- * length_am));
-#else
- // Merge directly to target.
- sd->merging_places[iam] = sd->source + offset;
-#endif
- std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
- seqs(sd->num_threads);
+ typedef std::vector<
+ std::pair<SortingPlacesIterator, SortingPlacesIterator> >
+ seq_vector_type;
+ seq_vector_type seqs(sd->num_threads);
for (int s = 0; s < sd->num_threads; ++s)
{
seqs[s] =
- std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin,
- sd->sorting_places[s] + sd->pieces[iam][s].end);
+ std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin,
+ sd->temporary[s] + sd->pieces[iam][s].end);
}
- multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp,
- length_am, sd->stable, false, sequential_tag());
+ possibly_stable_multiway_merge<
+ stable,
+ typename seq_vector_type::iterator,
+ RandomAccessIterator,
+ Comparator, difference_type>()
+ (seqs.begin(), seqs.end(),
+ sd->source + offset, comp,
+ length_am);
# pragma omp barrier
-#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
- // Write back.
- std::copy(sd->merging_places[iam],
- sd->merging_places[iam] + length_am,
- sd->source + offset);
-#endif
-
- ::operator delete(sd->temporaries[iam]);
+ ::operator delete(sd->temporary[iam]);
}
/** @brief PMWMS main call.
@@ -329,21 +397,22 @@ template<typename RandomAccessIterator, typename Comparator>
* @param comp Comparator.
* @param n Length of sequence.
* @param num_threads Number of threads to use.
- * @param stable Stable sorting.
*/
-template<typename RandomAccessIterator, typename Comparator>
+template<bool stable, bool exact, typename RandomAccessIterator,
+ typename Comparator>
void
parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
- Comparator comp, typename
- std::iterator_traits<RandomAccessIterator>::
- difference_type n, int num_threads, bool stable)
+ Comparator comp,
+ thread_index_t num_threads)
{
- _GLIBCXX_CALL(n)
+ _GLIBCXX_CALL(end - begin)
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
+ difference_type n = end - begin;
+
if (n <= 1)
return;
@@ -354,7 +423,6 @@ template<typename RandomAccessIterator, typename Comparator>
// shared variables
PMWMSSortingData<RandomAccessIterator> sd;
difference_type* starts;
- const _Settings& __s = _Settings::get();
# pragma omp parallel num_threads(num_threads)
{
@@ -364,23 +432,16 @@ template<typename RandomAccessIterator, typename Comparator>
{
sd.num_threads = num_threads;
sd.source = begin;
- sd.temporaries = new value_type*[num_threads];
-
-#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
- sd.sorting_places = new RandomAccessIterator[num_threads];
- sd.merging_places = new value_type*[num_threads];
-#else
- sd.sorting_places = new value_type*[num_threads];
- sd.merging_places = new RandomAccessIterator[num_threads];
-#endif
- if (__s.sort_splitting == SAMPLING)
+ sd.temporary = new value_type*[num_threads];
+
+ if (!exact)
{
- unsigned int size =
- (__s.sort_mwms_oversampling * num_threads - 1)
+ difference_type size =
+ (_Settings::get().sort_mwms_oversampling * num_threads - 1)
* num_threads;
sd.samples = static_cast<value_type*>(
- ::operator new(size * sizeof(value_type)));
+ ::operator new(size * sizeof(value_type)));
}
else
sd.samples = NULL;
@@ -390,7 +451,6 @@ template<typename RandomAccessIterator, typename Comparator>
for (int s = 0; s < num_threads; ++s)
sd.pieces[s].resize(num_threads);
starts = sd.starts = new difference_type[num_threads + 1];
- sd.stable = stable;
difference_type chunk_length = n / num_threads;
difference_type split = n % num_threads;
@@ -401,18 +461,16 @@ template<typename RandomAccessIterator, typename Comparator>
pos += (i < split) ? (chunk_length + 1) : chunk_length;
}
starts[num_threads] = pos;
- }
+ } //single
// Now sort in parallel.
- parallel_sort_mwms_pu(&sd, comp);
+ parallel_sort_mwms_pu<stable, exact>(&sd, comp);
} //parallel
delete[] starts;
- delete[] sd.temporaries;
- delete[] sd.sorting_places;
- delete[] sd.merging_places;
+ delete[] sd.temporary;
- if (__s.sort_splitting == SAMPLING)
+ if (!exact)
::operator delete(sd.samples);
delete[] sd.offsets;
diff --git a/libstdc++-v3/include/parallel/sort.h b/libstdc++-v3/include/parallel/sort.h
index edf4eea..83aa2df 100644
--- a/libstdc++-v3/include/parallel/sort.h
+++ b/libstdc++-v3/include/parallel/sort.h
@@ -71,7 +71,7 @@ namespace __gnu_parallel
template<typename RandomAccessIterator, typename Comparator>
inline void
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
- Comparator comp, bool stable)
+ Comparator comp, bool stable)
{
_GLIBCXX_CALL(end - begin)
typedef std::iterator_traits<RandomAccessIterator> traits_type;
@@ -79,25 +79,43 @@ namespace __gnu_parallel
typedef typename traits_type::difference_type difference_type;
if (begin != end)
- {
- difference_type n = end - begin;
+ {
+ difference_type n = end - begin;
- if (false) ;
+ if (false) ;
#if _GLIBCXX_MERGESORT
- else if (stable || _Settings::get().sort_algorithm == MWMS)
- parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable);
+ else if (stable)
+ {
+ if(_Settings::get().sort_splitting == EXACT)
+ parallel_sort_mwms<true, true>
+ (begin, end, comp, get_max_threads());
+ else
+ parallel_sort_mwms<true, false>
+ (begin, end, comp, get_max_threads());
+ }
+ else if (_Settings::get().sort_algorithm == MWMS)
+ {
+ if(_Settings::get().sort_splitting == EXACT)
+ parallel_sort_mwms<false, true>
+ (begin, end, comp, get_max_threads());
+ else
+ parallel_sort_mwms<false, false>
+ (begin, end, comp, get_max_threads());
+ }
#endif
#if _GLIBCXX_QUICKSORT
- else if (!stable && _Settings::get().sort_algorithm == QS)
- parallel_sort_qs(begin, end, comp, n, get_max_threads());
+ else if (!stable && _Settings::get().sort_algorithm == QS)
+ parallel_sort_qs(begin, end, comp, n, get_max_threads());
#endif
#if _GLIBCXX_BAL_QUICKSORT
- else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED)
- parallel_sort_qsb(begin, end, comp, n, get_max_threads());
+ else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED)
+ parallel_sort_qsb(begin, end, comp, n, get_max_threads());
#endif
- else
- __gnu_sequential::sort(begin, end, comp);
- }
+ else if(stable)
+ __gnu_sequential::stable_sort(begin, end, comp);
+ else
+ __gnu_sequential::sort(begin, end, comp);
+ }
}
} // end namespace __gnu_parallel
diff --git a/libstdc++-v3/include/parallel/tags.h b/libstdc++-v3/include/parallel/tags.h
index b3f2ec8..f57add9 100644
--- a/libstdc++-v3/include/parallel/tags.h
+++ b/libstdc++-v3/include/parallel/tags.h
@@ -44,6 +44,9 @@ namespace __gnu_parallel
/** @brief Forces sequential execution at compile time. */
struct sequential_tag { };
+ /** @brief Forces exact splitting in multiway merge at compile time. */
+ struct exact_tag { };
+
/** @brief Recommends parallel execution at compile time. */
struct parallel_tag { };
diff --git a/libstdc++-v3/include/parallel/types.h b/libstdc++-v3/include/parallel/types.h
index ded617e..1b646b0 100644
--- a/libstdc++-v3/include/parallel/types.h
+++ b/libstdc++-v3/include/parallel/types.h
@@ -87,15 +87,10 @@ namespace __gnu_parallel
/// Merging algorithms:
// bubblesort-alike, loser-tree variants, enum sentinel.
enum _MultiwayMergeAlgorithm
- {
- BUBBLE,
- LOSER_TREE_EXPLICIT,
- LOSER_TREE,
- LOSER_TREE_COMBINED,
- LOSER_TREE_SENTINEL,
- ENUM_SENTINEL
+ {
+ LOSER_TREE
};
-
+
/// Partial sum algorithms: recursive, linear.
enum _PartialSumAlgorithm
{
diff --git a/libstdc++-v3/testsuite/25_algorithms/sort/35588.cc b/libstdc++-v3/testsuite/25_algorithms/sort/35588.cc
new file mode 100644
index 0000000..715fa3b
--- /dev/null
+++ b/libstdc++-v3/testsuite/25_algorithms/sort/35588.cc
@@ -0,0 +1,32 @@
+// Copyright (C) 2008 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING. If not, write to the Free
+// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+// USA.
+
+#include <algorithm>
+#include <functional>
+#include <tr1/functional>
+
+// libstdc++/35588
+int main()
+{
+ using namespace std;
+ using namespace tr1;
+ using namespace placeholders;
+
+ int t[10];
+ sort(t, t+10, bind(less<int>(), _1, _2));
+}