aboutsummaryrefslogtreecommitdiff
path: root/libstdc++-v3
diff options
context:
space:
mode:
authorJohannes Singler <singler@ira.uka.de>2007-11-22 10:13:08 +0000
committerJohannes Singler <singler@gcc.gnu.org>2007-11-22 10:13:08 +0000
commite683ee2a20b907e9b225dd83cd047331e87a7edf (patch)
treec5838738b1b4d3a360f76e4418890226a450834a /libstdc++-v3
parent7861a5ce14376a46409d83eeebef942be4030fe4 (diff)
downloadgcc-e683ee2a20b907e9b225dd83cd047331e87a7edf.zip
gcc-e683ee2a20b907e9b225dd83cd047331e87a7edf.tar.gz
gcc-e683ee2a20b907e9b225dd83cd047331e87a7edf.tar.bz2
re PR libstdc++/33893 ([parallel mode] Algorithms rely on omp_set_dynamic(false))
2007-11-22 Johannes Singler <singler@ira.uka.de> PR libstdc++/33893 * include/parallel/multiway_merge.h: made omp_dynamic-safe * include/parallel/workstealing.h: made omp_dynamic-safe * include/parallel/base.h: infrastructure, cleanup * include/parallel/par_loop.h: made omp_dynamic-safe * include/parallel/features.h: activate loser tree variant * include/parallel/quicksort.h: made omp_dynamic-safe * include/parallel/compiletime_settings.h: settings overridable * include/parallel/equally_split.h: made omp_dynamic-safe * include/parallel/omp_loop_static.h: made omp_dynamic-safe * include/parallel/random_shuffle.h: made omp_dynamic-safe * include/parallel/balanced_quicksort.h: made omp_dynamic-safe * include/parallel/set_operations.h: made omp_dynamic-safe * include/parallel/unique_copy.h: made omp_dynamic-safe * include/parallel/multiway_mergesort.h: made omp_dynamic-safe * include/parallel/search.h: made omp_dynamic-safe * include/parallel/partition.h: made omp_dynamic-safe * include/parallel/partial_sum.h: made omp_dynamic-safe * include/parallel/find.h: made omp_dynamic-safe * include/parallel/omp_loop.h: made omp_dynamic-safe * include/parallel/losertree.h: avoid default constructor From-SVN: r130347
Diffstat (limited to 'libstdc++-v3')
-rw-r--r--libstdc++-v3/ChangeLog24
-rw-r--r--libstdc++-v3/include/parallel/balanced_quicksort.h412
-rw-r--r--libstdc++-v3/include/parallel/base.h483
-rw-r--r--libstdc++-v3/include/parallel/compiletime_settings.h23
-rw-r--r--libstdc++-v3/include/parallel/equally_split.h62
-rw-r--r--libstdc++-v3/include/parallel/features.h2
-rw-r--r--libstdc++-v3/include/parallel/find.h458
-rw-r--r--libstdc++-v3/include/parallel/losertree.h849
-rw-r--r--libstdc++-v3/include/parallel/multiway_merge.h1797
-rw-r--r--libstdc++-v3/include/parallel/multiway_mergesort.h403
-rw-r--r--libstdc++-v3/include/parallel/omp_loop.h92
-rw-r--r--libstdc++-v3/include/parallel/omp_loop_static.h62
-rw-r--r--libstdc++-v3/include/parallel/par_loop.h101
-rw-r--r--libstdc++-v3/include/parallel/partial_sum.h227
-rw-r--r--libstdc++-v3/include/parallel/partition.h520
-rw-r--r--libstdc++-v3/include/parallel/quicksort.h72
-rw-r--r--libstdc++-v3/include/parallel/random_shuffle.h393
-rw-r--r--libstdc++-v3/include/parallel/search.h108
-rw-r--r--libstdc++-v3/include/parallel/set_operations.h545
-rw-r--r--libstdc++-v3/include/parallel/unique_copy.h199
-rw-r--r--libstdc++-v3/include/parallel/workstealing.h386
21 files changed, 3914 insertions, 3304 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index dbb620c..7415d55 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,27 @@
+2007-11-22 Johannes Singler <singler@ira.uka.de>
+
+ PR libstdc++/33893
+ * include/parallel/multiway_merge.h: made omp_dynamic-safe
+ * include/parallel/workstealing.h: made omp_dynamic-safe
+ * include/parallel/base.h: infrastructure, cleanup
+ * include/parallel/par_loop.h: made omp_dynamic-safe
+ * include/parallel/features.h: activate loser tree variant
+ * include/parallel/quicksort.h: made omp_dynamic-safe
+ * include/parallel/compiletime_settings.h: settings overridable
+ * include/parallel/equally_split.h: made omp_dynamic-safe
+ * include/parallel/omp_loop_static.h: made omp_dynamic-safe
+ * include/parallel/random_shuffle.h: made omp_dynamic-safe
+ * include/parallel/balanced_quicksort.h: made omp_dynamic-safe
+ * include/parallel/set_operations.h: made omp_dynamic-safe
+ * include/parallel/unique_copy.h: made omp_dynamic-safe
+ * include/parallel/multiway_mergesort.h: made omp_dynamic-safe
+ * include/parallel/search.h: made omp_dynamic-safe
+ * include/parallel/partition.h: made omp_dynamic-safe
+ * include/parallel/partial_sum.h: made omp_dynamic-safe
+ * include/parallel/find.h: made omp_dynamic-safe
+ * include/parallel/omp_loop.h: made omp_dynamic-safe
+ * include/parallel/losertree.h: avoid default constructor
+
2007-11-21 Jonathan Wakely <jwakely.gcc@gmail.com>
* docs/html/17_intro/C++STYLE: Fix typos.
diff --git a/libstdc++-v3/include/parallel/balanced_quicksort.h b/libstdc++-v3/include/parallel/balanced_quicksort.h
index c282770..0d845ce 100644
--- a/libstdc++-v3/include/parallel/balanced_quicksort.h
+++ b/libstdc++-v3/include/parallel/balanced_quicksort.h
@@ -63,15 +63,15 @@
namespace __gnu_parallel
{
- /** @brief Information local to one thread in the parallel quicksort run. */
- template<typename RandomAccessIterator>
+/** @brief Information local to one thread in the parallel quicksort run. */
+template<typename RandomAccessIterator>
struct QSBThreadLocal
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
/** @brief Continuous part of the sequence, described by an
- iterator pair. */
+ iterator pair. */
typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
/** @brief Initial piece to work on. */
@@ -94,29 +94,17 @@ namespace __gnu_parallel
QSBThreadLocal(int queue_size) : leftover_parts(queue_size) { }
};
- /** @brief Initialize the thread local storage.
- * @param tls Array of thread-local storages.
- * @param queue_size Size of the work-stealing queue. */
- template<typename RandomAccessIterator>
- inline void
- qsb_initialize(QSBThreadLocal<RandomAccessIterator>** tls, int queue_size)
- {
- int iam = omp_get_thread_num();
- tls[iam] = new QSBThreadLocal<RandomAccessIterator>(queue_size);
- }
-
-
- /** @brief Balanced quicksort divide step.
- * @param begin Begin iterator of subsequence.
- * @param end End iterator of subsequence.
- * @param comp Comparator.
- * @param num_threads Number of threads that are allowed to work on
- * this part.
- * @pre @c (end-begin)>=1 */
- template<typename RandomAccessIterator, typename Comparator>
+/** @brief Balanced quicksort divide step.
+ * @param begin Begin iterator of subsequence.
+ * @param end End iterator of subsequence.
+ * @param comp Comparator.
+ * @param num_threads Number of threads that are allowed to work on
+ * this part.
+ * @pre @c (end-begin)>=1 */
+template<typename RandomAccessIterator, typename Comparator>
inline typename std::iterator_traits<RandomAccessIterator>::difference_type
qsb_divide(RandomAccessIterator begin, RandomAccessIterator end,
- Comparator comp, int num_threads)
+ Comparator comp, thread_index_t num_threads)
{
_GLIBCXX_PARALLEL_ASSERT(num_threads > 0);
@@ -124,18 +112,20 @@ namespace __gnu_parallel
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
- RandomAccessIterator pivot_pos = median_of_three_iterators(begin, begin + (end - begin) / 2, end - 1, comp);
+ RandomAccessIterator pivot_pos = median_of_three_iterators(
+ begin, begin + (end - begin) / 2, end - 1, comp);
#if defined(_GLIBCXX_ASSERTIONS)
// Must be in between somewhere.
difference_type n = end - begin;
- _GLIBCXX_PARALLEL_ASSERT((!comp(*pivot_pos, *begin) && !comp(*(begin + n / 2), *pivot_pos))
- || (!comp(*pivot_pos, *begin) && !comp(*end, *pivot_pos))
- || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*begin, *pivot_pos))
- || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*end, *pivot_pos))
- || (!comp(*pivot_pos, *end) && !comp(*begin, *pivot_pos))
- || (!comp(*pivot_pos, *end) && !comp(*(begin + n / 2), *pivot_pos)));
+ _GLIBCXX_PARALLEL_ASSERT(
+ (!comp(*pivot_pos, *begin) && !comp(*(begin + n / 2), *pivot_pos))
+ || (!comp(*pivot_pos, *begin) && !comp(*end, *pivot_pos))
+ || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*begin, *pivot_pos))
+ || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*end, *pivot_pos))
+ || (!comp(*pivot_pos, *end) && !comp(*begin, *pivot_pos))
+ || (!comp(*pivot_pos, *end) && !comp(*(begin + n / 2), *pivot_pos)));
#endif
// Swap pivot value to end.
@@ -143,10 +133,12 @@ namespace __gnu_parallel
std::swap(*pivot_pos, *(end - 1));
pivot_pos = end - 1;
- __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> pred(comp, *pivot_pos);
+ __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
+ pred(comp, *pivot_pos);
// Divide, returning end - begin - 1 in the worst case.
- difference_type split_pos = parallel_partition(begin, end - 1, pred, num_threads);
+ difference_type split_pos = parallel_partition(
+ begin, end - 1, pred, num_threads);
// Swap back pivot to middle.
std::swap(*(begin + split_pos), *pivot_pos);
@@ -163,18 +155,21 @@ namespace __gnu_parallel
return split_pos;
}
- /** @brief Quicksort conquer step.
- * @param tls Array of thread-local storages.
- * @param begin Begin iterator of subsequence.
- * @param end End iterator of subsequence.
- * @param comp Comparator.
- * @param iam Number of the thread processing this function.
- * @param num_threads Number of threads that are allowed to work on this part. */
- template<typename RandomAccessIterator, typename Comparator>
+/** @brief Quicksort conquer step.
+ * @param tls Array of thread-local storages.
+ * @param begin Begin iterator of subsequence.
+ * @param end End iterator of subsequence.
+ * @param comp Comparator.
+ * @param iam Number of the thread processing this function.
+ * @param num_threads
+ * Number of threads that are allowed to work on this part. */
+template<typename RandomAccessIterator, typename Comparator>
inline void
qsb_conquer(QSBThreadLocal<RandomAccessIterator>** tls,
- RandomAccessIterator begin, RandomAccessIterator end,
- Comparator comp, thread_index_t iam, thread_index_t num_threads)
+ RandomAccessIterator begin, RandomAccessIterator end,
+ Comparator comp,
+ thread_index_t iam, thread_index_t num_threads,
+ bool parent_wait)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
@@ -182,14 +177,14 @@ namespace __gnu_parallel
difference_type n = end - begin;
- if (num_threads <= 1 || n < 2)
+ if (num_threads <= 1 || n <= 1)
{
- tls[iam]->initial.first = begin;
- tls[iam]->initial.second = end;
+ tls[iam]->initial.first = begin;
+ tls[iam]->initial.second = end;
- qsb_local_sort_with_helping(tls, comp, iam);
+ qsb_local_sort_with_helping(tls, comp, iam, parent_wait);
- return;
+ return;
}
// Divide step.
@@ -199,33 +194,55 @@ namespace __gnu_parallel
_GLIBCXX_PARALLEL_ASSERT(0 <= split_pos && split_pos < (end - begin));
#endif
- thread_index_t num_threads_leftside = std::max<thread_index_t>(1, std::min<thread_index_t>(num_threads - 1, split_pos * num_threads / n));
+ thread_index_t num_threads_leftside =
+ std::max<thread_index_t>(1, std::min<thread_index_t>(
+ num_threads - 1, split_pos * num_threads / n));
-#pragma omp atomic
+# pragma omp atomic
*tls[iam]->elements_leftover -= (difference_type)1;
// Conquer step.
-#pragma omp parallel sections num_threads(2)
+# pragma omp parallel num_threads(2)
{
-#pragma omp section
- qsb_conquer(tls, begin, begin + split_pos, comp, iam, num_threads_leftside);
- // The pivot_pos is left in place, to ensure termination.
-#pragma omp section
- qsb_conquer(tls, begin + split_pos + 1, end, comp,
- iam + num_threads_leftside, num_threads - num_threads_leftside);
+ bool wait;
+ if(omp_get_num_threads() < 2)
+ wait = false;
+ else
+ wait = parent_wait;
+
+# pragma omp sections
+ {
+# pragma omp section
+ {
+ qsb_conquer(tls, begin, begin + split_pos, comp,
+ iam,
+ num_threads_leftside,
+ wait);
+ wait = parent_wait;
+ }
+ // The pivot_pos is left in place, to ensure termination.
+# pragma omp section
+ {
+ qsb_conquer(tls, begin + split_pos + 1, end, comp,
+ iam + num_threads_leftside,
+ num_threads - num_threads_leftside,
+ wait);
+ wait = parent_wait;
+ }
+ }
}
}
- /**
- * @brief Quicksort step doing load-balanced local sort.
- * @param tls Array of thread-local storages.
- * @param comp Comparator.
- * @param iam Number of the thread processing this function.
- */
- template<typename RandomAccessIterator, typename Comparator>
+/**
+ * @brief Quicksort step doing load-balanced local sort.
+ * @param tls Array of thread-local storages.
+ * @param comp Comparator.
+ * @param iam Number of the thread processing this function.
+ */
+template<typename RandomAccessIterator, typename Comparator>
inline void
qsb_local_sort_with_helping(QSBThreadLocal<RandomAccessIterator>** tls,
- Comparator& comp, int iam)
+ Comparator& comp, int iam, bool wait)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
@@ -251,151 +268,162 @@ namespace __gnu_parallel
for (;;)
{
- // Invariant: current must be a valid (maybe empty) range.
- RandomAccessIterator begin = current.first, end = current.second;
- difference_type n = end - begin;
+ // Invariant: current must be a valid (maybe empty) range.
+ RandomAccessIterator begin = current.first, end = current.second;
+ difference_type n = end - begin;
- if (n > base_case_n)
- {
- // Divide.
- RandomAccessIterator pivot_pos = begin + rng(n);
+ if (n > base_case_n)
+ {
+ // Divide.
+ RandomAccessIterator pivot_pos = begin + rng(n);
- // Swap pivot_pos value to end.
- if (pivot_pos != (end - 1))
- std::swap(*pivot_pos, *(end - 1));
- pivot_pos = end - 1;
+ // Swap pivot_pos value to end.
+ if (pivot_pos != (end - 1))
+ std::swap(*pivot_pos, *(end - 1));
+ pivot_pos = end - 1;
- __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> pred(comp, *pivot_pos);
+ __gnu_parallel::binder2nd
+ <Comparator, value_type, value_type, bool>
+ pred(comp, *pivot_pos);
- // Divide, leave pivot unchanged in last place.
- RandomAccessIterator split_pos1, split_pos2;
- split_pos1 = __gnu_sequential::partition(begin, end - 1, pred);
+ // Divide, leave pivot unchanged in last place.
+ RandomAccessIterator split_pos1, split_pos2;
+ split_pos1 = __gnu_sequential::partition(begin, end - 1, pred);
- // Left side: < pivot_pos; right side: >= pivot_pos.
+ // Left side: < pivot_pos; right side: >= pivot_pos.
#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(begin <= split_pos1 && split_pos1 < end);
+ _GLIBCXX_PARALLEL_ASSERT(begin <= split_pos1 && split_pos1 < end);
#endif
- // Swap pivot back to middle.
- if (split_pos1 != pivot_pos)
- std::swap(*split_pos1, *pivot_pos);
- pivot_pos = split_pos1;
-
- // In case all elements are equal, split_pos1 == 0.
- if ((split_pos1 + 1 - begin) < (n >> 7)
- || (end - split_pos1) < (n >> 7))
- {
- // Very unequal split, one part smaller than one 128th
- // elements not strictly larger than the pivot.
- __gnu_parallel::unary_negate<__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>, value_type> pred(__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>(comp, *pivot_pos));
-
- // Find other end of pivot-equal range.
- split_pos2 = __gnu_sequential::partition(split_pos1 + 1, end, pred);
- }
- else
- {
- // Only skip the pivot.
- split_pos2 = split_pos1 + 1;
- }
-
- // Elements equal to pivot are done.
- elements_done += (split_pos2 - split_pos1);
+ // Swap pivot back to middle.
+ if (split_pos1 != pivot_pos)
+ std::swap(*split_pos1, *pivot_pos);
+ pivot_pos = split_pos1;
+
+ // In case all elements are equal, split_pos1 == 0.
+ if ((split_pos1 + 1 - begin) < (n >> 7)
+ || (end - split_pos1) < (n >> 7))
+ {
+ // Very unequal split, one part smaller than one 128th
+ // elements not strictly larger than the pivot.
+ __gnu_parallel::unary_negate<__gnu_parallel::binder1st
+ <Comparator, value_type, value_type, bool>, value_type>
+ pred(__gnu_parallel::binder1st
+ <Comparator, value_type, value_type, bool>(
+ comp, *pivot_pos));
+
+ // Find other end of pivot-equal range.
+ split_pos2 = __gnu_sequential::partition(
+ split_pos1 + 1, end, pred);
+ }
+ else
+ // Only skip the pivot.
+ split_pos2 = split_pos1 + 1;
+
+ // Elements equal to pivot are done.
+ elements_done += (split_pos2 - split_pos1);
#if _GLIBCXX_ASSERTIONS
- total_elements_done += (split_pos2 - split_pos1);
+ total_elements_done += (split_pos2 - split_pos1);
#endif
- // Always push larger part onto stack.
- if (((split_pos1 + 1) - begin) < (end - (split_pos2)))
- {
- // Right side larger.
- if ((split_pos2) != end)
- tl.leftover_parts.push_front(std::make_pair(split_pos2, end));
-
- //current.first = begin; //already set anyway
- current.second = split_pos1;
- continue;
- }
- else
- {
- // Left side larger.
- if (begin != split_pos1)
- tl.leftover_parts.push_front(std::make_pair(begin, split_pos1));
-
- current.first = split_pos2;
- //current.second = end; //already set anyway
- continue;
- }
- }
- else
- {
- __gnu_sequential::sort(begin, end, comp);
- elements_done += n;
+ // Always push larger part onto stack.
+ if (((split_pos1 + 1) - begin) < (end - (split_pos2)))
+ {
+ // Right side larger.
+ if ((split_pos2) != end)
+ tl.leftover_parts.push_front(std::make_pair(split_pos2, end));
+
+ //current.first = begin; //already set anyway
+ current.second = split_pos1;
+ continue;
+ }
+ else
+ {
+ // Left side larger.
+ if (begin != split_pos1)
+ tl.leftover_parts.push_front(
+ std::make_pair(begin, split_pos1));
+
+ current.first = split_pos2;
+ //current.second = end; //already set anyway
+ continue;
+ }
+ }
+ else
+ {
+ __gnu_sequential::sort(begin, end, comp);
+ elements_done += n;
#if _GLIBCXX_ASSERTIONS
- total_elements_done += n;
+ total_elements_done += n;
#endif
- // Prefer own stack, small pieces.
- if (tl.leftover_parts.pop_front(current))
- continue;
+ // Prefer own stack, small pieces.
+ if (tl.leftover_parts.pop_front(current))
+ continue;
-#pragma omp atomic
- *tl.elements_leftover -= elements_done;
- elements_done = 0;
+# pragma omp atomic
+ *tl.elements_leftover -= elements_done;
+
+ elements_done = 0;
#if _GLIBCXX_ASSERTIONS
- double search_start = omp_get_wtime();
+ double search_start = omp_get_wtime();
#endif
- // Look for new work.
- bool success = false;
- while (*tl.elements_leftover > 0 && !success
+ // Look for new work.
+ bool successfully_stolen = false;
+ while (wait && *tl.elements_leftover > 0 && !successfully_stolen
#if _GLIBCXX_ASSERTIONS
- // Possible dead-lock.
- && (omp_get_wtime() < (search_start + 1.0))
+ // Possible dead-lock.
+ && (omp_get_wtime() < (search_start + 1.0))
#endif
- )
- {
- thread_index_t victim;
- victim = rng(num_threads);
-
- // Large pieces.
- success = (victim != iam) && tls[victim]->leftover_parts.pop_back(current);
- if (!success)
- yield();
+ )
+ {
+ thread_index_t victim;
+ victim = rng(num_threads);
+
+ // Large pieces.
+ successfully_stolen = (victim != iam)
+ && tls[victim]->leftover_parts.pop_back(current);
+ if (!successfully_stolen)
+ yield();
#if !defined(__ICC) && !defined(__ECC)
-#pragma omp flush
+# pragma omp flush
#endif
- }
+ }
#if _GLIBCXX_ASSERTIONS
- if (omp_get_wtime() >= (search_start + 1.0))
- {
- sleep(1);
- _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime() < (search_start + 1.0));
- }
+ if (omp_get_wtime() >= (search_start + 1.0))
+ {
+ sleep(1);
+ _GLIBCXX_PARALLEL_ASSERT(
+ omp_get_wtime() < (search_start + 1.0));
+ }
#endif
- if (!success)
- {
+ if (!successfully_stolen)
+ {
#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(*tl.elements_leftover == 0);
+ _GLIBCXX_PARALLEL_ASSERT(*tl.elements_leftover == 0);
#endif
- return;
- }
- }
+ return;
+ }
+ }
}
}
- /** @brief Top-level quicksort routine.
- * @param begin Begin iterator of sequence.
- * @param end End iterator of sequence.
- * @param comp Comparator.
- * @param n Length of the sequence to sort.
- * @param num_threads Number of threads that are allowed to work on
- * this part.
- */
- template<typename RandomAccessIterator, typename Comparator>
+/** @brief Top-level quicksort routine.
+ * @param begin Begin iterator of sequence.
+ * @param end End iterator of sequence.
+ * @param comp Comparator.
+ * @param n Length of the sequence to sort.
+ * @param num_threads Number of threads that are allowed to work on
+ * this part.
+ */
+template<typename RandomAccessIterator, typename Comparator>
inline void
parallel_sort_qsb(RandomAccessIterator begin, RandomAccessIterator end,
- Comparator comp,
- typename std::iterator_traits<RandomAccessIterator>::difference_type n, int num_threads)
+ Comparator comp,
+ typename std::iterator_traits<RandomAccessIterator>
+ ::difference_type n,
+ thread_index_t num_threads)
{
_GLIBCXX_CALL(end - begin)
@@ -413,11 +441,11 @@ namespace __gnu_parallel
if (num_threads > n)
num_threads = static_cast<thread_index_t>(n);
+ // Initialize thread local storage
tls_type** tls = new tls_type*[num_threads];
-
-#pragma omp parallel num_threads(num_threads)
- // Initialize variables per processor.
- qsb_initialize(tls, num_threads * (thread_index_t)(log2(n) + 1));
+ difference_type queue_size = num_threads * (thread_index_t)(log2(n) + 1);
+ for (thread_index_t t = 0; t < num_threads; ++t)
+ tls[t] = new QSBThreadLocal<RandomAccessIterator>(queue_size);
// There can never be more than ceil(log2(n)) ranges on the stack, because
// 1. Only one processor pushes onto the stack
@@ -426,22 +454,16 @@ namespace __gnu_parallel
volatile difference_type elements_leftover = n;
for (int i = 0; i < num_threads; i++)
{
- tls[i]->elements_leftover = &elements_leftover;
- tls[i]->num_threads = num_threads;
- tls[i]->global = std::make_pair(begin, end);
+ tls[i]->elements_leftover = &elements_leftover;
+ tls[i]->num_threads = num_threads;
+ tls[i]->global = std::make_pair(begin, end);
- // Just in case nothing is left to assign.
- tls[i]->initial = std::make_pair(end, end);
+ // Just in case nothing is left to assign.
+ tls[i]->initial = std::make_pair(end, end);
}
- // Initial splitting, recursively.
- int old_nested = omp_get_nested();
- omp_set_nested(true);
-
// Main recursion call.
- qsb_conquer(tls, begin, begin + n, comp, 0, num_threads);
-
- omp_set_nested(old_nested);
+ qsb_conquer(tls, begin, begin + n, comp, 0, num_threads, true);
#if _GLIBCXX_ASSERTIONS
// All stack must be empty.
diff --git a/libstdc++-v3/include/parallel/base.h b/libstdc++-v3/include/parallel/base.h
index 0a86d83..5a756cd 100644
--- a/libstdc++-v3/include/parallel/base.h
+++ b/libstdc++-v3/include/parallel/base.h
@@ -49,54 +49,70 @@ namespace __gnu_parallel
// XXX remove std::duplicates from here if possible,
// XXX but keep minimal dependencies.
- /** @brief Calculates the rounded-down logarithm of @c n for base 2.
- * @param n Argument.
- * @return Returns 0 for argument 0.
- */
- template<typename Size>
- inline Size
- log2(Size n)
+/** @brief Calculates the rounded-down logarithm of @c n for base 2.
+ * @param n Argument.
+ * @return Returns 0 for argument 0.
+ */
+template<typename Size>
+ inline Size
+ log2(Size n)
{
Size k;
for (k = 0; n != 1; n >>= 1)
- ++k;
+ ++k;
return k;
}
- /** @brief Encode two integers into one __gnu_parallel::lcas_t.
- * @param a First integer, to be encoded in the most-significant @c
- * lcas_t_bits/2 bits.
- * @param b Second integer, to be encoded in the least-significant
- * @c lcas_t_bits/2 bits.
- * @return __gnu_parallel::lcas_t value encoding @c a and @c b.
- * @see decode2
- */
- inline lcas_t
- encode2(int a, int b) //must all be non-negative, actually
+/** @brief Encode two integers into one __gnu_parallel::lcas_t.
+ * @param a First integer, to be encoded in the most-significant @c
+ * lcas_t_bits/2 bits.
+ * @param b Second integer, to be encoded in the least-significant
+ * @c lcas_t_bits/2 bits.
+ * @return __gnu_parallel::lcas_t value encoding @c a and @c b.
+ * @see decode2
+ */
+inline lcas_t
+encode2(int a, int b) //must all be non-negative, actually
+{
+ return (((lcas_t)a) << (lcas_t_bits / 2)) | (((lcas_t)b) << 0);
+}
+
+/** @brief Decode two integers from one __gnu_parallel::lcas_t.
+ * @param x __gnu_parallel::lcas_t to decode integers from.
+ * @param a First integer, to be decoded from the most-significant
+ * @c lcas_t_bits/2 bits of @c x.
+ * @param b Second integer, to be encoded in the least-significant
+ * @c lcas_t_bits/2 bits of @c x.
+ * @see encode2
+ */
+inline void
+decode2(lcas_t x, int& a, int& b)
+{
+ a = (int)((x >> (lcas_t_bits / 2)) & lcas_t_mask);
+ b = (int)((x >> 0 ) & lcas_t_mask);
+}
+
+/** @brief Equivalent to std::min. */
+template<typename T>
+ const T&
+ min(const T& a, const T& b)
{
- return (((lcas_t)a) << (lcas_t_bits / 2)) | (((lcas_t)b) << 0);
- }
+ return (a < b) ? a : b;
+ };
- /** @brief Decode two integers from one __gnu_parallel::lcas_t.
- * @param x __gnu_parallel::lcas_t to decode integers from.
- * @param a First integer, to be decoded from the most-significant
- * @c lcas_t_bits/2 bits of @c x.
- * @param b Second integer, to be encoded in the least-significant
- * @c lcas_t_bits/2 bits of @c x.
- * @see encode2
- */
- inline void
- decode2(lcas_t x, int& a, int& b)
+/** @brief Equivalent to std::max. */
+template<typename T>
+ const T&
+ max(const T& a, const T& b)
{
- a = (int)((x >> (lcas_t_bits / 2)) & lcas_t_mask);
- b = (int)((x >> 0 ) & lcas_t_mask);
- }
+ return (a > b) ? a : b;
+ };
- /** @brief Constructs predicate for equality from strict weak
- * ordering predicate
- */
- // XXX comparator at the end, as per others
- template<typename Comparator, typename T1, typename T2>
+/** @brief Constructs predicate for equality from strict weak
+ * ordering predicate
+ */
+// XXX comparator at the end, as per others
+template<typename Comparator, typename T1, typename T2>
class equal_from_less : public std::binary_function<T1, T2, bool>
{
private:
@@ -112,162 +128,176 @@ namespace __gnu_parallel
};
- /** @brief Similar to std::binder1st, but giving the argument types explicitly. */
- template<typename _Predicate, typename argument_type>
- class unary_negate
- : public std::unary_function<argument_type, bool>
- {
- protected:
- _Predicate _M_pred;
-
- public:
- explicit
- unary_negate(const _Predicate& __x) : _M_pred(__x) { }
-
- bool
- operator()(const argument_type& __x)
- { return !_M_pred(__x); }
- };
-
- /** @brief Similar to std::binder1st, but giving the argument types explicitly. */
- template<typename _Operation, typename first_argument_type, typename second_argument_type, typename result_type>
- class binder1st
- : public std::unary_function<second_argument_type, result_type>
- {
- protected:
- _Operation op;
- first_argument_type value;
-
- public:
- binder1st(const _Operation& __x,
- const first_argument_type& __y)
- : op(__x), value(__y) { }
-
- result_type
- operator()(const second_argument_type& __x)
- { return op(value, __x); }
-
- // _GLIBCXX_RESOLVE_LIB_DEFECTS
- // 109. Missing binders for non-const sequence elements
- result_type
- operator()(second_argument_type& __x) const
- { return op(value, __x); }
- };
-
- /**
- * @brief Similar to std::binder2nd, but giving the argument types
- * explicitly.
- */
- template<typename _Operation, typename first_argument_type, typename second_argument_type, typename result_type>
- class binder2nd
- : public std::unary_function<first_argument_type, result_type>
- {
- protected:
- _Operation op;
- second_argument_type value;
-
- public:
- binder2nd(const _Operation& __x,
- const second_argument_type& __y)
- : op(__x), value(__y) { }
-
- result_type
- operator()(const first_argument_type& __x) const
- { return op(__x, value); }
-
- // _GLIBCXX_RESOLVE_LIB_DEFECTS
- // 109. Missing binders for non-const sequence elements
- result_type
- operator()(first_argument_type& __x)
- { return op(__x, value); }
- };
-
- /** @brief Similar to std::equal_to, but allows two different types. */
- template<typename T1, typename T2>
+/** @brief Similar to std::binder1st,
+ * but giving the argument types explicitly. */
+template<typename _Predicate, typename argument_type>
+ class unary_negate
+ : public std::unary_function<argument_type, bool>
+ {
+ protected:
+ _Predicate _M_pred;
+
+ public:
+ explicit
+ unary_negate(const _Predicate& __x) : _M_pred(__x) { }
+
+ bool
+ operator()(const argument_type& __x)
+ { return !_M_pred(__x); }
+ };
+
+/** @brief Similar to std::binder1st,
+ * but giving the argument types explicitly. */
+template<
+ typename _Operation,
+ typename first_argument_type,
+ typename second_argument_type,
+ typename result_type>
+ class binder1st
+ : public std::unary_function<second_argument_type, result_type>
+ {
+ protected:
+ _Operation op;
+ first_argument_type value;
+
+ public:
+ binder1st(const _Operation& __x,
+ const first_argument_type& __y)
+ : op(__x), value(__y) { }
+
+ result_type
+ operator()(const second_argument_type& __x)
+ { return op(value, __x); }
+
+ // _GLIBCXX_RESOLVE_LIB_DEFECTS
+ // 109. Missing binders for non-const sequence elements
+ result_type
+ operator()(second_argument_type& __x) const
+ { return op(value, __x); }
+ };
+
+/**
+ * @brief Similar to std::binder2nd, but giving the argument types
+ * explicitly.
+ */
+template<
+ typename _Operation,
+ typename first_argument_type,
+ typename second_argument_type,
+ typename result_type>
+ class binder2nd
+ : public std::unary_function<first_argument_type, result_type>
+ {
+ protected:
+ _Operation op;
+ second_argument_type value;
+
+ public:
+ binder2nd(const _Operation& __x,
+ const second_argument_type& __y)
+ : op(__x), value(__y) { }
+
+ result_type
+ operator()(const first_argument_type& __x) const
+ { return op(__x, value); }
+
+ // _GLIBCXX_RESOLVE_LIB_DEFECTS
+ // 109. Missing binders for non-const sequence elements
+ result_type
+ operator()(first_argument_type& __x)
+ { return op(__x, value); }
+ };
+
+/** @brief Similar to std::equal_to, but allows two different types. */
+template<typename T1, typename T2>
struct equal_to : std::binary_function<T1, T2, bool>
{
bool operator()(const T1& t1, const T2& t2) const
{ return t1 == t2; }
};
- /** @brief Similar to std::less, but allows two different types. */
- template<typename T1, typename T2>
+/** @brief Similar to std::less, but allows two different types. */
+template<typename T1, typename T2>
struct less : std::binary_function<T1, T2, bool>
{
- bool
+ bool
operator()(const T1& t1, const T2& t2) const
{ return t1 < t2; }
- bool
+ bool
operator()(const T2& t2, const T1& t1) const
{ return t2 < t1; }
};
- // Partial specialization for one type. Same as std::less.
- template<typename _Tp>
- struct less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool>
- {
- bool
- operator()(const _Tp& __x, const _Tp& __y) const
- { return __x < __y; }
- };
+// Partial specialization for one type. Same as std::less.
+template<typename _Tp>
+struct less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool>
+ {
+ bool
+ operator()(const _Tp& __x, const _Tp& __y) const
+ { return __x < __y; }
+ };
- /** @brief Similar to std::plus, but allows two different types. */
- template<typename _Tp1, typename _Tp2>
- struct plus : public std::binary_function<_Tp1, _Tp2, _Tp1>
- {
- typedef typeof(*static_cast<_Tp1*>(NULL) + *static_cast<_Tp2*>(NULL)) result;
+ /** @brief Similar to std::plus, but allows two different types. */
+template<typename _Tp1, typename _Tp2>
+ struct plus : public std::binary_function<_Tp1, _Tp2, _Tp1>
+ {
+ typedef typeof(*static_cast<_Tp1*>(NULL)
+ + *static_cast<_Tp2*>(NULL)) result;
- result
- operator()(const _Tp1& __x, const _Tp2& __y) const
- { return __x + __y; }
- };
+ result
+ operator()(const _Tp1& __x, const _Tp2& __y) const
+ { return __x + __y; }
+ };
- // Partial specialization for one type. Same as std::plus.
- template<typename _Tp>
- struct plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
- {
- typedef typeof(*static_cast<_Tp*>(NULL) + *static_cast<_Tp*>(NULL)) result;
+// Partial specialization for one type. Same as std::plus.
+template<typename _Tp>
+ struct plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
+ {
+ typedef typeof(*static_cast<_Tp*>(NULL)
+ + *static_cast<_Tp*>(NULL)) result;
- result
- operator()(const _Tp& __x, const _Tp& __y) const
- { return __x + __y; }
- };
+ result
+ operator()(const _Tp& __x, const _Tp& __y) const
+ { return __x + __y; }
+ };
- /** @brief Similar to std::multiplies, but allows two different types. */
- template<typename _Tp1, typename _Tp2>
- struct multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1>
- {
- typedef typeof(*static_cast<_Tp1*>(NULL) * *static_cast<_Tp2*>(NULL)) result;
+/** @brief Similar to std::multiplies, but allows two different types. */
+template<typename _Tp1, typename _Tp2>
+ struct multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1>
+ {
+ typedef typeof(*static_cast<_Tp1*>(NULL)
+ * *static_cast<_Tp2*>(NULL)) result;
- result
- operator()(const _Tp1& __x, const _Tp2& __y) const
- { return __x * __y; }
- };
+ result
+ operator()(const _Tp1& __x, const _Tp2& __y) const
+ { return __x * __y; }
+ };
- // Partial specialization for one type. Same as std::multiplies.
- template<typename _Tp>
- struct multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
- {
- typedef typeof(*static_cast<_Tp*>(NULL) * *static_cast<_Tp*>(NULL)) result;
+// Partial specialization for one type. Same as std::multiplies.
+template<typename _Tp>
+ struct multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
+ {
+ typedef typeof(*static_cast<_Tp*>(NULL)
+ * *static_cast<_Tp*>(NULL)) result;
- result
- operator()(const _Tp& __x, const _Tp& __y) const
- { return __x * __y; }
- };
+ result
+ operator()(const _Tp& __x, const _Tp& __y) const
+ { return __x * __y; }
+ };
- template<typename T, typename _DifferenceTp>
+template<typename T, typename _DifferenceTp>
class pseudo_sequence;
- /** @brief Iterator associated with __gnu_parallel::pseudo_sequence.
- * If features the usual random-access iterator functionality.
- * @param T Sequence value type.
- * @param difference_type Sequence difference type.
- */
- template<typename T, typename _DifferenceTp>
+/** @brief Iterator associated with __gnu_parallel::pseudo_sequence.
+ * If features the usual random-access iterator functionality.
+ * @param T Sequence value type.
+ * @param difference_type Sequence difference type.
+ */
+template<typename T, typename _DifferenceTp>
class pseudo_sequence_iterator
{
public:
@@ -296,34 +326,34 @@ namespace __gnu_parallel
operator++(int)
{ return type(pos++); }
- const T&
+ const T&
operator*() const
{ return val; }
- const T&
+ const T&
operator[](difference_type) const
{ return val; }
- bool
+ bool
operator==(const type& i2)
{ return pos == i2.pos; }
- difference_type
+ difference_type
operator!=(const type& i2)
{ return pos != i2.pos; }
- difference_type
+ difference_type
operator-(const type& i2)
{ return pos - i2.pos; }
};
- /** @brief Sequence that conceptually consists of multiple copies of
- the same element.
- * The copies are not stored explicitly, of course.
- * @param T Sequence value type.
- * @param difference_type Sequence difference type.
- */
- template<typename T, typename _DifferenceTp>
+/** @brief Sequence that conceptually consists of multiple copies of
+ the same element.
+ * The copies are not stored explicitly, of course.
+ * @param T Sequence value type.
+ * @param difference_type Sequence difference type.
+ */
+template<typename T, typename _DifferenceTp>
class pseudo_sequence
{
typedef pseudo_sequence<T, _DifferenceTp> type;
@@ -335,10 +365,10 @@ namespace __gnu_parallel
typedef pseudo_sequence_iterator<T, uint64> iterator;
/** @brief Constructor.
- * @param val Element of the sequence.
- * @param count Number of (virtual) copies.
- */
- pseudo_sequence(const T& val, difference_type count)
+ * @param val Element of the sequence.
+ * @param count Number of (virtual) copies.
+ */
+ pseudo_sequence(const T& val, difference_type count)
: val(val), count(count) { }
/** @brief Begin iterator. */
@@ -356,67 +386,66 @@ namespace __gnu_parallel
difference_type count;
};
- /** @brief Functor that does nothing */
- template<typename _ValueTp>
+/** @brief Functor that does nothing */
+template<typename _ValueTp>
class void_functor
{
- inline void
+ inline void
operator()(const _ValueTp& v) const { }
};
- /** @brief Compute the median of three referenced elements,
- according to @c comp.
- * @param a First iterator.
- * @param b Second iterator.
- * @param c Third iterator.
- * @param comp Comparator.
- */
- template<typename RandomAccessIterator, typename Comparator>
- RandomAccessIterator
- median_of_three_iterators(RandomAccessIterator a, RandomAccessIterator b,
- RandomAccessIterator c, Comparator& comp)
+/** @brief Compute the median of three referenced elements,
+ according to @c comp.
+ * @param a First iterator.
+ * @param b Second iterator.
+ * @param c Third iterator.
+ * @param comp Comparator.
+ */
+template<typename RandomAccessIterator, typename Comparator>
+RandomAccessIterator
+ median_of_three_iterators(RandomAccessIterator a, RandomAccessIterator b,
+ RandomAccessIterator c, Comparator& comp)
{
if (comp(*a, *b))
if (comp(*b, *c))
- return b;
+ return b;
else
- if (comp(*a, *c))
- return c;
- else
- return a;
+ if (comp(*a, *c))
+ return c;
+ else
+ return a;
else
{
- // Just swap a and b.
- if (comp(*a, *c))
- return a;
- else
- if (comp(*b, *c))
- return c;
- else
- return b;
+ // Just swap a and b.
+ if (comp(*a, *c))
+ return a;
+ else
+ if (comp(*b, *c))
+ return c;
+ else
+ return b;
}
}
- // Avoid the use of assert, because we're trying to keep the <cassert>
- // include out of the mix. (Same as debug mode).
- inline void
- __replacement_assert(const char* __file, int __line,
- const char* __function, const char* __condition)
- {
- std::printf("%s:%d: %s: Assertion '%s' failed.\n", __file, __line,
- __function, __condition);
- __builtin_abort();
- }
-
+// Avoid the use of assert, because we're trying to keep the <cassert>
+// include out of the mix. (Same as debug mode).
+inline void
+__replacement_assert(const char* __file, int __line,
+ const char* __function, const char* __condition)
+{
+ std::printf("%s:%d: %s: Assertion '%s' failed.\n", __file, __line,
+ __function, __condition);
+ __builtin_abort();
+}
+
#define _GLIBCXX_PARALLEL_ASSERT(_Condition) \
- do \
- { \
- if (!(_Condition)) \
- __gnu_parallel::__replacement_assert(__FILE__, __LINE__, \
- __PRETTY_FUNCTION__, #_Condition); \
- } while (false)
-
+do \
+ { \
+ if (!(_Condition)) \
+ __gnu_parallel::__replacement_assert(__FILE__, __LINE__, \
+ __PRETTY_FUNCTION__, #_Condition); \
+ } while (false)
+
} //namespace __gnu_parallel
#endif
-
diff --git a/libstdc++-v3/include/parallel/compiletime_settings.h b/libstdc++-v3/include/parallel/compiletime_settings.h
index 717bda5..edaea38 100644
--- a/libstdc++-v3/include/parallel/compiletime_settings.h
+++ b/libstdc++-v3/include/parallel/compiletime_settings.h
@@ -39,7 +39,7 @@
#include <cstdio>
/** @brief Determine verbosity level of the parallel mode.
- * Level 1 prints a message each time when entering a parallel-mode function. */
+ * Level 1 prints a message each time a parallel-mode function is entered. */
#define _GLIBCXX_VERBOSE_LEVEL 0
/** @def _GLIBCXX_CALL
@@ -50,27 +50,40 @@
#define _GLIBCXX_CALL(n)
#endif
#if (_GLIBCXX_VERBOSE_LEVEL == 1)
-#define _GLIBCXX_CALL(n) printf(" %s:\niam = %d, n = %ld, num_threads = %d\n", __PRETTY_FUNCTION__, omp_get_thread_num(), (n), get_max_threads());
+#define _GLIBCXX_CALL(n) \
+ printf(" %s:\niam = %d, n = %ld, num_threads = %d\n", \
+ __PRETTY_FUNCTION__, omp_get_thread_num(), (n), get_max_threads());
#endif
+#ifndef _GLIBCXX_SCALE_DOWN_FPU
/** @brief Use floating-point scaling instead of modulo for mapping
* random numbers to a range. This can be faster on certain CPUs. */
#define _GLIBCXX_SCALE_DOWN_FPU 0
+#endif
+#ifndef _GLIBCXX_ASSERTIONS
/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
* Should be switched on only locally. */
#define _GLIBCXX_ASSERTIONS 0
+#endif
+#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
- * Consider the size of the L1 cache for __gnu_parallel::parallel_random_shuffle(). */
+ * Consider the size of the L1 cache for
+ * __gnu_parallel::parallel_random_shuffle(). */
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0
+#endif
+#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
- * Consider the size of the TLB for __gnu_parallel::parallel_random_shuffle(). */
+ * Consider the size of the TLB for
+ * __gnu_parallel::parallel_random_shuffle(). */
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0
+#endif
+#ifndef _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
/** @brief First copy the data, sort it locally, and merge it back
* (0); or copy it back after everything is done (1).
*
* Recommendation: 0 */
#define _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST 0
-
+#endif
diff --git a/libstdc++-v3/include/parallel/equally_split.h b/libstdc++-v3/include/parallel/equally_split.h
index 730875d..4c4167e 100644
--- a/libstdc++-v3/include/parallel/equally_split.h
+++ b/libstdc++-v3/include/parallel/equally_split.h
@@ -39,30 +39,58 @@
namespace __gnu_parallel
{
- /** @brief Function to split a sequence into parts of almost equal size.
- *
- * The resulting sequence s of length p+1 contains the splitting
- * positions when splitting the range [0,n) into parts of almost
- * equal size (plus minus 1). The first entry is 0, the last one
- * n. There may result empty parts.
- * @param n Number of elements
- * @param p Number of parts
- * @param s Splitters
- * @returns End of splitter sequence, i. e. @c s+p+1 */
- template<typename _DifferenceTp, typename OutputIterator>
+/** @brief Function to split a sequence into parts of almost equal size.
+ *
+ * The resulting sequence s of length num_threads+1 contains the splitting
+ * positions when splitting the range [0,n) into parts of almost
+ * equal size (plus minus 1). The first entry is 0, the last one
+ * n. There may result empty parts.
+ * @param n Number of elements
+ * @param num_threads Number of parts
+ * @param s Splitters
+ * @returns End of splitter sequence, i. e. @c s+num_threads+1 */
+template<typename difference_type, typename OutputIterator>
OutputIterator
- equally_split(_DifferenceTp n, thread_index_t p, OutputIterator s)
+ equally_split(difference_type n,
+ thread_index_t num_threads,
+ OutputIterator s)
{
- typedef _DifferenceTp difference_type;
- difference_type chunk_length = n / p, split = n % p, start = 0;
- for (int i = 0; i < p; i++)
+ difference_type chunk_length = n / num_threads,
+ num_longer_chunks = n % num_threads,
+ pos = 0;
+ for (thread_index_t i = 0; i < num_threads; ++i)
{
- *s++ = start;
- start += (difference_type(i) < split) ? (chunk_length + 1) : chunk_length;
+ *s++ = pos;
+ pos += (i < num_longer_chunks) ? (chunk_length + 1) : chunk_length;
}
*s++ = n;
return s;
}
+
+
+/** @brief Function to split a sequence into parts of almost equal size.
+ *
+ * Returns the position of the splitting point between
+ * thread number thread_no (included) and
+ * thread number thread_no+1 (excluded).
+ * @param n Number of elements
+ * @param num_threads Number of parts
+ * @returns Splitting point */
+template<typename difference_type>
+ difference_type
+ equally_split_point(difference_type n,
+ thread_index_t num_threads,
+ thread_index_t thread_no)
+ {
+ difference_type chunk_length = n / num_threads,
+ num_longer_chunks = n % num_threads;
+
+ if(thread_no < num_longer_chunks)
+ return thread_no * (chunk_length + 1);
+ else
+ return num_longer_chunks * (chunk_length + 1)
+ + (thread_no - num_longer_chunks) * chunk_length;
+ }
}
#endif
diff --git a/libstdc++-v3/include/parallel/features.h b/libstdc++-v3/include/parallel/features.h
index c67ffb5..a78c16b 100644
--- a/libstdc++-v3/include/parallel/features.h
+++ b/libstdc++-v3/include/parallel/features.h
@@ -66,7 +66,7 @@
* @brief Include guarded (sequences may run empty) loser tree,
* moving objects.
* @see __gnu_parallel::Settings multiway_merge_algorithm */
-#define _GLIBCXX_LOSER_TREE 0
+#define _GLIBCXX_LOSER_TREE 1
#endif
#ifndef _GLIBCXX_LOSER_TREE_EXPLICIT
diff --git a/libstdc++-v3/include/parallel/find.h b/libstdc++-v3/include/parallel/find.h
index bc89fa5..2a5b22c 100644
--- a/libstdc++-v3/include/parallel/find.h
+++ b/libstdc++-v3/include/parallel/find.h
@@ -10,7 +10,7 @@
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURstartE. See the GNU
// General Public License for more details.
// You should have received a copy of the GNU General Public License
@@ -48,50 +48,66 @@
namespace __gnu_parallel
{
- /**
- * @brief Parallel std::find, switch for different algorithms.
- * @param begin1 Begin iterator of first sequence.
- * @param end1 End iterator of first sequence.
- * @param begin2 Begin iterator of second sequence. Must have same
- * length as first sequence.
- * @param pred Find predicate.
- * @param selector Functionality (e. g. std::find_if (), std::equal(),...)
- * @return Place of finding in both sequences.
- */
- template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred, typename Selector>
+/**
+ * @brief Parallel std::find, switch for different algorithms.
+ * @param begin1 Begin iterator of first sequence.
+ * @param end1 End iterator of first sequence.
+ * @param begin2 Begin iterator of second sequence. Must have same
+ * length as first sequence.
+ * @param pred Find predicate.
+ * @param selector Functionality (e. g. std::find_if (), std::equal(),...)
+ * @return Place of finding in both sequences.
+ */
+template<
+ typename RandomAccessIterator1,
+ typename RandomAccessIterator2,
+ typename Pred,
+ typename Selector>
std::pair<RandomAccessIterator1, RandomAccessIterator2>
find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
- RandomAccessIterator2 begin2, Pred pred, Selector selector)
+ RandomAccessIterator2 begin2, Pred pred, Selector selector)
{
switch (Settings::find_distribution)
{
case Settings::GROWING_BLOCKS:
- return find_template(begin1, end1, begin2, pred, selector, growing_blocks_tag());
+ return find_template(begin1, end1, begin2, pred, selector,
+ growing_blocks_tag());
case Settings::CONSTANT_SIZE_BLOCKS:
- return find_template(begin1, end1, begin2, pred, selector, constant_size_blocks_tag());
+ return find_template(begin1, end1, begin2, pred, selector,
+ constant_size_blocks_tag());
case Settings::EQUAL_SPLIT:
- return find_template(begin1, end1, begin2, pred, selector, equal_split_tag());
+ return find_template(begin1, end1, begin2, pred, selector,
+ equal_split_tag());
default:
- _GLIBCXX_PARALLEL_ASSERT(false);
- return std::make_pair(begin1, begin2);
+ _GLIBCXX_PARALLEL_ASSERT(false);
+ return std::make_pair(begin1, begin2);
}
}
#if _GLIBCXX_FIND_EQUAL_SPLIT
- /**
- * @brief Parallel std::find, equal splitting variant.
- * @param begin1 Begin iterator of first sequence.
- * @param end1 End iterator of first sequence.
- * @param begin2 Begin iterator of second sequence. Second sequence
- * must have same length as first sequence.
- * @param pred Find predicate.
- * @param selector Functionality (e. g. std::find_if (), std::equal(),...)
- * @return Place of finding in both sequences.
- */
- template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred, typename Selector>
+/**
+ * @brief Parallel std::find, equal splitting variant.
+ * @param begin1 Begin iterator of first sequence.
+ * @param end1 End iterator of first sequence.
+ * @param begin2 Begin iterator of second sequence. Second sequence
+ * must have same length as first sequence.
+ * @param pred Find predicate.
+ * @param selector Functionality (e. g. std::find_if (), std::equal(),...)
+ * @return Place of finding in both sequences.
+ */
+template<
+ typename RandomAccessIterator1,
+ typename RandomAccessIterator2,
+ typename Pred,
+ typename Selector>
std::pair<RandomAccessIterator1, RandomAccessIterator2>
- find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, RandomAccessIterator2 begin2, Pred pred, Selector selector, equal_split_tag)
+ find_template(RandomAccessIterator1 begin1,
+ RandomAccessIterator1 end1,
+ RandomAccessIterator2 begin2,
+ Pred pred,
+ Selector selector,
+ equal_split_tag)
{
_GLIBCXX_CALL(end1 - begin1)
@@ -100,79 +116,89 @@ namespace __gnu_parallel
typedef typename traits_type::value_type value_type;
difference_type length = end1 - begin1;
-
difference_type result = length;
+ difference_type* borders;
- const thread_index_t num_threads = get_max_threads();
omp_lock_t result_lock;
omp_init_lock(&result_lock);
- difference_type* borders = static_cast<difference_type*>(__builtin_alloca(sizeof(difference_type) * (num_threads + 1)));
-
- equally_split(length, num_threads, borders);
-
-#pragma omp parallel shared(result) num_threads(num_threads)
- {
- int iam = omp_get_thread_num();
- difference_type pos = borders[iam], limit = borders[iam + 1];
-
- RandomAccessIterator1 i1 = begin1 + pos;
- RandomAccessIterator2 i2 = begin2 + pos;
- for (; pos < limit; pos++)
- {
-#pragma omp flush(result)
- // Result has been set to something lower.
- if (result < pos)
- break;
-
- if (selector(i1, i2, pred))
- {
- omp_set_lock(&result_lock);
- if (result > pos)
- result = pos;
- omp_unset_lock(&result_lock);
+ thread_index_t num_threads = get_max_threads();
+# pragma omp parallel num_threads(num_threads)
+ {
+# pragma omp single
+ {
+ num_threads = omp_get_num_threads();
+ borders = new difference_type[num_threads + 1];
+ equally_split(length, num_threads, borders);
+ } //single
+
+ thread_index_t iam = omp_get_thread_num();
+ difference_type start = borders[iam], stop = borders[iam + 1];
+
+ RandomAccessIterator1 i1 = begin1 + start;
+ RandomAccessIterator2 i2 = begin2 + start;
+ for (difference_type pos = start; pos < stop; ++pos)
+ {
+ #pragma omp flush(result)
+ // Result has been set to something lower.
+ if (result < pos)
break;
- }
- i1++;
- i2++;
- }
- }
+
+ if (selector(i1, i2, pred))
+ {
+ omp_set_lock(&result_lock);
+ if (pos < result)
+ result = pos;
+ omp_unset_lock(&result_lock);
+ break;
+ }
+ ++i1;
+ ++i2;
+ }
+ } //parallel
omp_destroy_lock(&result_lock);
- return std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, begin2 + result);
+ delete[] borders;
+
+ return std::pair<RandomAccessIterator1, RandomAccessIterator2>(
+ begin1 + result, begin2 + result);
}
#endif
#if _GLIBCXX_FIND_GROWING_BLOCKS
- /**
- * @brief Parallel std::find, growing block size variant.
- * @param begin1 Begin iterator of first sequence.
- * @param end1 End iterator of first sequence.
- * @param begin2 Begin iterator of second sequence. Second sequence
- * must have same length as first sequence.
- * @param pred Find predicate.
- * @param selector Functionality (e. g. std::find_if (), std::equal(),...)
- * @return Place of finding in both sequences.
- * @see __gnu_parallel::Settings::find_sequential_search_size
- * @see __gnu_parallel::Settings::find_initial_block_size
- * @see __gnu_parallel::Settings::find_maximum_block_size
- * @see __gnu_parallel::Settings::find_increasing_factor
- *
- * There are two main differences between the growing blocks and
- * the constant-size blocks variants.
- * 1. For GB, the block size grows; for CSB, the block size is fixed.
-
- * 2. For GB, the blocks are allocated dynamically;
- * for CSB, the blocks are allocated in a predetermined manner,
- * namely spacial round-robin.
- */
- template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred, typename Selector>
+/**
+ * @brief Parallel std::find, growing block size variant.
+ * @param begin1 Begin iterator of first sequence.
+ * @param end1 End iterator of first sequence.
+ * @param begin2 Begin iterator of second sequence. Second sequence
+ * must have same length as first sequence.
+ * @param pred Find predicate.
+ * @param selector Functionality (e. g. std::find_if (), std::equal(),...)
+ * @return Place of finding in both sequences.
+ * @see __gnu_parallel::Settings::find_sequential_search_size
+ * @see __gnu_parallel::Settings::find_initial_block_size
+ * @see __gnu_parallel::Settings::find_maximum_block_size
+ * @see __gnu_parallel::Settings::find_increasing_factor
+ *
+ * There are two main differences between the growing blocks and
+ * the constant-size blocks variants.
+ * 1. For GB, the block size grows; for CSB, the block size is fixed.
+
+ * 2. For GB, the blocks are allocated dynamically;
+ * for CSB, the blocks are allocated in a predetermined manner,
+ * namely spacial round-robin.
+ */
+template<
+ typename RandomAccessIterator1,
+ typename RandomAccessIterator2,
+ typename Pred,
+ typename Selector>
std::pair<RandomAccessIterator1, RandomAccessIterator2>
find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
- RandomAccessIterator2 begin2, Pred pred, Selector selector,
- growing_blocks_tag)
+ RandomAccessIterator2 begin2, Pred pred, Selector selector,
+ growing_blocks_tag)
{
_GLIBCXX_CALL(end1 - begin1)
@@ -182,101 +208,118 @@ namespace __gnu_parallel
difference_type length = end1 - begin1;
- difference_type sequential_search_size = std::min<difference_type>(length, Settings::find_sequential_search_size);
+ difference_type sequential_search_size = std::min<difference_type>(
+ length, Settings::find_sequential_search_size);
// Try it sequentially first.
std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
- selector.sequential_algorithm(begin1, begin1 + sequential_search_size, begin2, pred);
+ selector.sequential_algorithm(
+ begin1, begin1 + sequential_search_size, begin2, pred);
if (find_seq_result.first != (begin1 + sequential_search_size))
return find_seq_result;
// Index of beginning of next free block (after sequential find).
- difference_type next_block_pos = sequential_search_size;
+ difference_type next_block_start = sequential_search_size;
difference_type result = length;
- const thread_index_t num_threads = get_max_threads();
omp_lock_t result_lock;
omp_init_lock(&result_lock);
-#pragma omp parallel shared(result) num_threads(num_threads)
- {
- // Not within first k elements -> start parallel.
- thread_index_t iam = omp_get_thread_num();
-
- difference_type block_size = Settings::find_initial_block_size;
- difference_type start = fetch_and_add<difference_type>(&next_block_pos, block_size);
-
- // Get new block, update pointer to next block.
- difference_type stop = std::min<difference_type>(length, start + block_size);
-
- std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result;
-
- while (start < length)
- {
-#pragma omp flush(result)
- // Get new value of result.
- if (result < start)
- {
- // No chance to find first element.
- break;
- }
-
- local_result = selector.sequential_algorithm(begin1 + start, begin1 + stop, begin2 + start, pred);
- if (local_result.first != (begin1 + stop))
- {
- omp_set_lock(&result_lock);
- if ((local_result.first - begin1) < result)
- {
- result = local_result.first - begin1;
-
- // Result cannot be in future blocks, stop algorithm.
- fetch_and_add<difference_type>(&next_block_pos, length);
- }
- omp_unset_lock(&result_lock);
- }
-
- block_size = std::min<difference_type>(block_size * Settings::find_increasing_factor, Settings::find_maximum_block_size);
-
- // Get new block, update pointer to next block.
- start = fetch_and_add<difference_type>(&next_block_pos, block_size);
- stop = (length < (start + block_size)) ? length : (start + block_size);
- }
- }
+ thread_index_t num_threads = get_max_threads();
+# pragma omp parallel shared(result) num_threads(num_threads)
+ {
+# pragma omp single
+ num_threads = omp_get_num_threads();
+
+ // Not within first k elements -> start parallel.
+ thread_index_t iam = omp_get_thread_num();
+
+ difference_type block_size = Settings::find_initial_block_size;
+ difference_type start =
+ fetch_and_add<difference_type>(&next_block_start, block_size);
+
+ // Get new block, update pointer to next block.
+ difference_type stop =
+ std::min<difference_type>(length, start + block_size);
+
+ std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result;
+
+ while (start < length)
+ {
+# pragma omp flush(result)
+ // Get new value of result.
+ if (result < start)
+ {
+ // No chance to find first element.
+ break;
+ }
+
+ local_result = selector.sequential_algorithm(
+ begin1 + start, begin1 + stop, begin2 + start, pred);
+ if (local_result.first != (begin1 + stop))
+ {
+ omp_set_lock(&result_lock);
+ if ((local_result.first - begin1) < result)
+ {
+ result = local_result.first - begin1;
+
+ // Result cannot be in future blocks, stop algorithm.
+ fetch_and_add<difference_type>(&next_block_start, length);
+ }
+ omp_unset_lock(&result_lock);
+ }
+
+ block_size = std::min<difference_type>(
+ block_size * Settings::find_increasing_factor,
+ Settings::find_maximum_block_size);
+
+ // Get new block, update pointer to next block.
+ start =
+ fetch_and_add<difference_type>(&next_block_start, block_size);
+ stop = (length < (start + block_size)) ?
+ length : (start + block_size);
+ }
+ } //parallel
omp_destroy_lock(&result_lock);
// Return iterator on found element.
- return std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, begin2 + result);
+ return std::pair<RandomAccessIterator1, RandomAccessIterator2>(
+ begin1 + result, begin2 + result);
}
#endif
#if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS
- /**
- * @brief Parallel std::find, constant block size variant.
- * @param begin1 Begin iterator of first sequence.
- * @param end1 End iterator of first sequence.
- * @param begin2 Begin iterator of second sequence. Second sequence
- * must have same length as first sequence.
- * @param pred Find predicate.
- * @param selector Functionality (e. g. std::find_if (), std::equal(),...)
- * @return Place of finding in both sequences.
- * @see __gnu_parallel::Settings::find_sequential_search_size
- * @see __gnu_parallel::Settings::find_block_size
- * There are two main differences between the growing blocks and the
- * constant-size blocks variants.
- * 1. For GB, the block size grows; for CSB, the block size is fixed.
- * 2. For GB, the blocks are allocated dynamically; for CSB, the
- * blocks are allocated in a predetermined manner, namely spacial
- * round-robin.
- */
- template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Pred, typename Selector>
+/**
+ * @brief Parallel std::find, constant block size variant.
+ * @param begin1 Begin iterator of first sequence.
+ * @param end1 End iterator of first sequence.
+ * @param begin2 Begin iterator of second sequence. Second sequence
+ * must have same length as first sequence.
+ * @param pred Find predicate.
+ * @param selector Functionality (e. g. std::find_if (), std::equal(),...)
+ * @return Place of finding in both sequences.
+ * @see __gnu_parallel::Settings::find_sequential_search_size
+ * @see __gnu_parallel::Settings::find_block_size
+ * There are two main differences between the growing blocks and the
+ * constant-size blocks variants.
+ * 1. For GB, the block size grows; for CSB, the block size is fixed.
+ * 2. For GB, the blocks are allocated dynamically; for CSB, the
+ * blocks are allocated in a predetermined manner, namely spacial
+ * round-robin.
+ */
+template<
+ typename RandomAccessIterator1,
+ typename RandomAccessIterator2,
+ typename Pred,
+ typename Selector>
std::pair<RandomAccessIterator1, RandomAccessIterator2>
find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
- RandomAccessIterator2 begin2, Pred pred, Selector selector,
- constant_size_blocks_tag)
+ RandomAccessIterator2 begin2, Pred pred, Selector selector,
+ constant_size_blocks_tag)
{
_GLIBCXX_CALL(end1 - begin1)
typedef std::iterator_traits<RandomAccessIterator1> traits_type;
@@ -285,72 +328,77 @@ namespace __gnu_parallel
difference_type length = end1 - begin1;
- difference_type sequential_search_size = std::min<difference_type>(length, Settings::find_sequential_search_size);
+ difference_type sequential_search_size = std::min<difference_type>(
+ length, Settings::find_sequential_search_size);
// Try it sequentially first.
std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
- selector.sequential_algorithm(begin1, begin1 + sequential_search_size, begin2, pred);
+ selector.sequential_algorithm(begin1, begin1 + sequential_search_size,
+ begin2, pred);
if (find_seq_result.first != (begin1 + sequential_search_size))
return find_seq_result;
difference_type result = length;
- const thread_index_t num_threads = get_max_threads();
-
omp_lock_t result_lock;
omp_init_lock(&result_lock);
// Not within first sequential_search_size elements -> start parallel.
-#pragma omp parallel shared(result) num_threads(num_threads)
- {
- thread_index_t iam = omp_get_thread_num();
- difference_type block_size = Settings::find_initial_block_size;
-
- difference_type start, stop;
-
- // First element of thread's current iteration.
- difference_type iteration_start = sequential_search_size;
-
- // Where to work (initialization).
- start = iteration_start + iam * block_size;
- stop = std::min<difference_type>(length, start + block_size);
-
- std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result;
-
- while (start < length)
- {
- // Get new value of result.
-#pragma omp flush(result)
- // No chance to find first element.
- if (result < start)
- break;
-
- local_result = selector.sequential_algorithm(begin1 + start, begin1 + stop, begin2 + start, pred);
- if (local_result.first != (begin1 + stop))
- {
- omp_set_lock(&result_lock);
- if ((local_result.first - begin1) < result)
- result = local_result.first - begin1;
- omp_unset_lock(&result_lock);
- // Will not find better value in its interval.
- break;
- }
-
- iteration_start += num_threads * block_size;
-
- // Where to work.
- start = iteration_start + iam * block_size;
- stop = std::min<difference_type>(length, start + block_size);
- }
- }
+
+ thread_index_t num_threads = get_max_threads();
+# pragma omp parallel shared(result) num_threads(num_threads)
+ {
+# pragma omp single
+ num_threads = omp_get_num_threads();
+
+ thread_index_t iam = omp_get_thread_num();
+ difference_type block_size = Settings::find_initial_block_size;
+
+ // First element of thread's current iteration.
+ difference_type iteration_start = sequential_search_size;
+
+ // Where to work (initialization).
+ difference_type start = iteration_start + iam * block_size;
+ difference_type stop =
+ std::min<difference_type>(length, start + block_size);
+
+ std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result;
+
+ while (start < length)
+ {
+ // Get new value of result.
+# pragma omp flush(result)
+ // No chance to find first element.
+ if (result < start)
+ break;
+ local_result = selector.sequential_algorithm(
+ begin1 + start, begin1 + stop,
+ begin2 + start, pred);
+ if (local_result.first != (begin1 + stop))
+ {
+ omp_set_lock(&result_lock);
+ if ((local_result.first - begin1) < result)
+ result = local_result.first - begin1;
+ omp_unset_lock(&result_lock);
+ // Will not find better value in its interval.
+ break;
+ }
+
+ iteration_start += num_threads * block_size;
+
+ // Where to work.
+ start = iteration_start + iam * block_size;
+ stop = std::min<difference_type>(length, start + block_size);
+ }
+ } //parallel
omp_destroy_lock(&result_lock);
// Return iterator on found element.
- return std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, begin2 + result);
+ return std::pair<RandomAccessIterator1, RandomAccessIterator2>(
+ begin1 + result, begin2 + result);
}
#endif
} // end namespace
#endif
-
diff --git a/libstdc++-v3/include/parallel/losertree.h b/libstdc++-v3/include/parallel/losertree.h
index 5c7a808..7b8b654 100644
--- a/libstdc++-v3/include/parallel/losertree.h
+++ b/libstdc++-v3/include/parallel/losertree.h
@@ -29,9 +29,9 @@
// Public License.
/** @file parallel/losertree.h
- * @brief Many generic loser tree variants.
- * This file is a GNU parallel extension to the Standard C++ Library.
- */
+* @brief Many generic loser tree variants.
+* This file is a GNU parallel extension to the Standard C++ Library.
+*/
// Written by Johannes Singler.
@@ -49,13 +49,13 @@ namespace __gnu_parallel
#if _GLIBCXX_LOSER_TREE_EXPLICIT
- /** @brief Guarded loser tree, copying the whole element into the
- * tree structure.
- *
- * Guarding is done explicitly through two flags per element, inf
- * and sup This is a quite slow variant.
- */
- template<typename T, typename Comparator = std::less<T> >
+/** @brief Guarded loser tree, copying the whole element into the
+* tree structure.
+*
+* Guarding is done explicitly through two flags per element, inf
+* and sup This is a quite slow variant.
+*/
+template<typename T, typename Comparator = std::less<T> >
class LoserTreeExplicit
{
private:
@@ -76,26 +76,25 @@ namespace __gnu_parallel
Comparator comp;
public:
- inline LoserTreeExplicit(unsigned int _size, Comparator _comp = std::less<T>()) : comp(_comp)
+ inline
+ LoserTreeExplicit(unsigned int _size, Comparator _comp = std::less<T>())
+ : comp(_comp)
{
size = _size;
offset = size;
losers = new Loser[size];
for (unsigned int l = 0; l < size; l++)
- {
- //losers[l].key = ... stays unset
- losers[l].inf = true;
- losers[l].sup = false;
- //losers[l].source = -1; //sentinel
- }
+ {
+ //losers[l].key = ... stays unset
+ losers[l].inf = true;
+ losers[l].sup = false;
+ //losers[l].source = -1; //sentinel
+ }
}
inline ~LoserTreeExplicit()
{ delete[] losers; }
- inline void
- print() { }
-
inline int
get_min_source()
{ return losers[0].source; }
@@ -105,16 +104,17 @@ namespace __gnu_parallel
{
bool inf = false;
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
- {
- if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup && comp(losers[pos].key, key)) || losers[pos].inf || sup)
- {
- // The other one is smaller.
- std::swap(losers[pos].key, key);
- std::swap(losers[pos].inf, inf);
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- }
- }
+ {
+ if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
+ && comp(losers[pos].key, key)) || losers[pos].inf || sup)
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].key, key);
+ std::swap(losers[pos].inf, inf);
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
+ }
+ }
losers[0].key = key;
losers[0].inf = inf;
@@ -131,19 +131,19 @@ namespace __gnu_parallel
bool inf = false;
int source = losers[0].source;
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted.
- if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
- && comp(losers[pos].key, key))
- || losers[pos].inf || sup)
- {
- // The other one is smaller.
- std::swap(losers[pos].key, key);
- std::swap(losers[pos].inf, inf);
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- }
- }
+ {
+ // The smaller one gets promoted.
+ if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
+ && comp(losers[pos].key, key))
+ || losers[pos].inf || sup)
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].key, key);
+ std::swap(losers[pos].inf, inf);
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
+ }
+ }
losers[0].key = key;
losers[0].inf = inf;
@@ -156,19 +156,19 @@ namespace __gnu_parallel
{
bool inf = false;
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
- {
- if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup &&
- ((comp(losers[pos].key, key)) ||
- (!comp(key, losers[pos].key) && losers[pos].source < source)))
- || losers[pos].inf || sup)
- {
- // Take next key.
- std::swap(losers[pos].key, key);
- std::swap(losers[pos].inf, inf);
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- }
- }
+ {
+ if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup &&
+ ((comp(losers[pos].key, key)) ||
+ (!comp(key, losers[pos].key) && losers[pos].source < source)))
+ || losers[pos].inf || sup)
+ {
+ // Take next key.
+ std::swap(losers[pos].key, key);
+ std::swap(losers[pos].inf, inf);
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
+ }
+ }
losers[0].key = key;
losers[0].inf = inf;
@@ -185,18 +185,18 @@ namespace __gnu_parallel
bool inf = false;
int source = losers[0].source;
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
- {
- if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
- && ((comp(losers[pos].key, key)) ||
- (!comp(key, losers[pos].key) && losers[pos].source < source)))
- || losers[pos].inf || sup)
- {
- std::swap(losers[pos].key, key);
- std::swap(losers[pos].inf, inf);
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- }
- }
+ {
+ if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
+ && ((comp(losers[pos].key, key)) ||
+ (!comp(key, losers[pos].key) && losers[pos].source < source)))
+ || losers[pos].inf || sup)
+ {
+ std::swap(losers[pos].key, key);
+ std::swap(losers[pos].inf, inf);
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
+ }
+ }
losers[0].key = key;
losers[0].inf = inf;
@@ -209,14 +209,14 @@ namespace __gnu_parallel
#if _GLIBCXX_LOSER_TREE
- /** @brief Guarded loser tree, either copying the whole element into
- * the tree structure, or looking up the element via the index.
- *
- * Guarding is done explicitly through one flag sup per element,
- * inf is not needed due to a better initialization routine. This
- * is a well-performing variant.
- */
- template<typename T, typename Comparator = std::less<T> >
+/** @brief Guarded loser tree, either copying the whole element into
+* the tree structure, or looking up the element via the index.
+*
+* Guarding is done explicitly through one flag sup per element,
+* inf is not needed due to a better initialization routine. This
+* is a well-performing variant.
+*/
+template<typename T, typename Comparator = std::less<T> >
class LoserTree
{
private:
@@ -240,22 +240,14 @@ namespace __gnu_parallel
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
- losers = new Loser[k * 2];
+ losers = static_cast<Loser*>(::operator new(k * 2 * sizeof(Loser)));
for (unsigned int i = ik - 1; i < k; i++)
- losers[i + k].sup = true;
+ losers[i + k].sup = true;
}
inline ~LoserTree()
{ delete[] losers; }
- void
- print()
- {
- for (unsigned int i = 0; i < (k * 2); i++)
- printf("%d %d from %d, %d\n", i, losers[i].key,
- losers[i].source, losers[i].sup);
- }
-
inline int
get_min_source()
{ return losers[0].source; }
@@ -267,33 +259,34 @@ namespace __gnu_parallel
losers[pos].sup = sup;
losers[pos].source = source;
- losers[pos].key = key;
+ new(&(losers[pos].key)) T(key);
}
unsigned int
init_winner (unsigned int root)
{
if (root >= k)
- {
- return root;
- }
+ {
+ return root;
+ }
else
- {
- unsigned int left = init_winner (2 * root);
- unsigned int right = init_winner (2 * root + 1);
- if (losers[right].sup ||
- (!losers[left].sup && !comp(losers[right].key, losers[left].key)))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- { // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+ if (losers[right].sup ||
+ (!losers[left].sup
+ && !comp(losers[right].key, losers[left].key)))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ { // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
}
inline void
@@ -306,16 +299,16 @@ namespace __gnu_parallel
{
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted.
- if (sup || (!losers[pos].sup && comp(losers[pos].key, key)))
- {
- // The other one is smaller.
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].key, key);
- }
- }
+ {
+ // The smaller one gets promoted.
+ if (sup || (!losers[pos].sup && comp(losers[pos].key, key)))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].key, key);
+ }
+ }
losers[0].sup = sup;
losers[0].source = source;
@@ -330,27 +323,28 @@ namespace __gnu_parallel
init_winner_stable (unsigned int root)
{
if (root >= k)
- {
- return root;
- }
+ {
+ return root;
+ }
else
- {
- unsigned int left = init_winner (2 * root);
- unsigned int right = init_winner (2 * root + 1);
- if ( losers[right].sup ||
- (!losers[left].sup && !comp(losers[right].key, losers[left].key)))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+ if (losers[right].sup
+ || (!losers[left].sup
+ && !comp(losers[right].key, losers[left].key)))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
}
inline void
@@ -363,19 +357,20 @@ namespace __gnu_parallel
{
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted, ties are broken by source.
- if ( (sup && (!losers[pos].sup || losers[pos].source < source)) ||
- (!sup && !losers[pos].sup &&
- ((comp(losers[pos].key, key)) ||
- (!comp(key, losers[pos].key) && losers[pos].source < source))))
- {
- // The other one is smaller.
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].key, key);
- }
- }
+ {
+ // The smaller one gets promoted, ties are broken by source.
+ if ( (sup && (!losers[pos].sup || losers[pos].source < source))
+ || (!sup && !losers[pos].sup
+ && ((comp(losers[pos].key, key))
+ || (!comp(key, losers[pos].key)
+ && losers[pos].source < source))))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].key, key);
+ }
+ }
losers[0].sup = sup;
losers[0].source = source;
@@ -387,14 +382,14 @@ namespace __gnu_parallel
#if _GLIBCXX_LOSER_TREE_REFERENCE
- /** @brief Guarded loser tree, either copying the whole element into
- * the tree structure, or looking up the element via the index.
- *
- * Guarding is done explicitly through one flag sup per element,
- * inf is not needed due to a better initialization routine. This
- * is a well-performing variant.
- */
- template<typename T, typename Comparator = std::less<T> >
+/** @brief Guarded loser tree, either copying the whole element into
+* the tree structure, or looking up the element via the index.
+*
+* Guarding is done explicitly through one flag sup per element,
+* inf is not needed due to a better initialization routine. This
+* is a well-performing variant.
+*/
+template<typename T, typename Comparator = std::less<T> >
class LoserTreeReference
{
#undef COPY
@@ -423,7 +418,9 @@ namespace __gnu_parallel
Comparator comp;
public:
- inline LoserTreeReference(unsigned int _k, Comparator _comp = std::less<T>()) : comp(_comp)
+ inline
+ LoserTreeReference(unsigned int _k, Comparator _comp = std::less<T>())
+ : comp(_comp)
{
ik = _k;
@@ -435,7 +432,7 @@ namespace __gnu_parallel
keys = new T[ik];
#endif
for (unsigned int i = ik - 1; i < k; i++)
- losers[i + k].sup = true;
+ losers[i + k].sup = true;
}
inline ~LoserTreeReference()
@@ -446,13 +443,6 @@ namespace __gnu_parallel
#endif
}
- void
- print()
- {
- for (unsigned int i = 0; i < (k * 2); i++)
- printf("%d %d from %d, %d\n", i, KEY(i), losers[i].source, losers[i].sup);
- }
-
inline int
get_min_source()
{ return losers[0].source; }
@@ -471,27 +461,27 @@ namespace __gnu_parallel
init_winner(unsigned int root)
{
if (root >= k)
- {
- return root;
- }
+ {
+ return root;
+ }
else
- {
- unsigned int left = init_winner (2 * root);
- unsigned int right = init_winner (2 * root + 1);
- if ( losers[right].sup ||
- (!losers[left].sup && !comp(KEY(right), KEY(left))))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+ if ( losers[right].sup ||
+ (!losers[left].sup && !comp(KEY(right), KEY(left))))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
}
inline void
@@ -505,18 +495,18 @@ namespace __gnu_parallel
{
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted.
- if (sup || (!losers[pos].sup && comp(KEY(pos), KEY_SOURCE(source))))
- {
- // The other one is smaller.
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
+ {
+ // The smaller one gets promoted.
+ if (sup || (!losers[pos].sup && comp(KEY(pos), KEY_SOURCE(source))))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
#ifdef COPY
- std::swap(KEY(pos), KEY_SOURCE(source));
+ std::swap(KEY(pos), KEY_SOURCE(source));
#endif
- }
- }
+ }
+ }
losers[0].sup = sup;
losers[0].source = source;
@@ -533,27 +523,27 @@ namespace __gnu_parallel
init_winner_stable(unsigned int root)
{
if (root >= k)
- {
- return root;
- }
+ {
+ return root;
+ }
else
- {
- unsigned int left = init_winner (2 * root);
- unsigned int right = init_winner (2 * root + 1);
- if (losers[right].sup
- || (!losers[left].sup && !comp(KEY(right), KEY(left))))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+ if (losers[right].sup
+ || (!losers[left].sup && !comp(KEY(right), KEY(left))))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
}
inline void
@@ -565,21 +555,22 @@ namespace __gnu_parallel
{
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted, ties are broken by source.
- if ( (sup && (!losers[pos].sup || losers[pos].source < source)) ||
- (!sup && !losers[pos].sup &&
- ((comp(KEY(pos), KEY_SOURCE(source))) ||
- (!comp(KEY_SOURCE(source), KEY(pos)) && losers[pos].source < source))))
- {
- // The other one is smaller.
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
+ {
+ // The smaller one gets promoted, ties are broken by source.
+ if ( (sup && (!losers[pos].sup || losers[pos].source < source)) ||
+ (!sup && !losers[pos].sup &&
+ ((comp(KEY(pos), KEY_SOURCE(source))) ||
+ (!comp(KEY_SOURCE(source), KEY(pos))
+ && losers[pos].source < source))))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
#ifdef COPY
- std::swap(KEY(pos), KEY_SOURCE(source));
+ std::swap(KEY(pos), KEY_SOURCE(source));
#endif
- }
- }
+ }
+ }
losers[0].sup = sup;
losers[0].source = source;
@@ -595,13 +586,13 @@ namespace __gnu_parallel
#if _GLIBCXX_LOSER_TREE_POINTER
- /** @brief Guarded loser tree, either copying the whole element into
- the tree structure, or looking up the element via the index.
- * Guarding is done explicitly through one flag sup per element,
- * inf is not needed due to a better initialization routine.
- * This is a well-performing variant.
- */
- template<typename T, typename Comparator = std::less<T> >
+/** @brief Guarded loser tree, either copying the whole element into
+ the tree structure, or looking up the element via the index.
+* Guarding is done explicitly through one flag sup per element,
+* inf is not needed due to a better initialization routine.
+* This is a well-performing variant.
+*/
+template<typename T, typename Comparator = std::less<T> >
class LoserTreePointer
{
private:
@@ -617,7 +608,9 @@ namespace __gnu_parallel
Comparator comp;
public:
- inline LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>()) : comp(_comp)
+ inline
+ LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
+ : comp(_comp)
{
ik = _k;
@@ -626,19 +619,12 @@ namespace __gnu_parallel
offset = k;
losers = new Loser[k * 2];
for (unsigned int i = ik - 1; i < k; i++)
- losers[i + k].sup = true;
+ losers[i + k].sup = true;
}
inline ~LoserTreePointer()
{ delete[] losers; }
- void
- print()
- {
- for (unsigned int i = 0; i < (k * 2); i++)
- printf("%d %d from %d, %d\n", i, losers[i].keyp, losers[i].source, losers[i].sup);
- }
-
inline int
get_min_source()
{ return losers[0].source; }
@@ -657,49 +643,50 @@ namespace __gnu_parallel
init_winner(unsigned int root)
{
if (root >= k)
- {
- return root;
- }
+ {
+ return root;
+ }
else
- {
- unsigned int left = init_winner (2 * root);
- unsigned int right = init_winner (2 * root + 1);
- if ( losers[right].sup ||
- (!losers[left].sup && !comp(*losers[right].keyp, *losers[left].keyp)))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+ if (losers[right].sup
+ || (!losers[left].sup
+ && !comp(*losers[right].keyp, *losers[left].keyp)))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
}
inline void
init()
{ losers[0] = losers[init_winner(1)]; }
- inline void
+ inline void
delete_min_insert(const T& key, bool sup)
{
const T* keyp = &key;
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted.
- if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp)))
- {
- // The other one is smaller.
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].keyp, keyp);
- }
- }
+ {
+ // The smaller one gets promoted.
+ if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp)))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].keyp, keyp);
+ }
+ }
losers[0].sup = sup;
losers[0].source = source;
@@ -714,28 +701,28 @@ namespace __gnu_parallel
init_winner_stable(unsigned int root)
{
if (root >= k)
- {
- return root;
- }
+ {
+ return root;
+ }
else
- {
- unsigned int left = init_winner (2 * root);
- unsigned int right = init_winner (2 * root + 1);
- if (losers[right].sup
- || (!losers[left].sup && !comp(*losers[right].keyp,
- *losers[left].keyp)))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
+ {
+ unsigned int left = init_winner (2 * root);
+ unsigned int right = init_winner (2 * root + 1);
+ if (losers[right].sup
+ || (!losers[left].sup && !comp(*losers[right].keyp,
+ *losers[left].keyp)))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
}
inline void
@@ -748,20 +735,20 @@ namespace __gnu_parallel
const T* keyp = &key;
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted, ties are broken by source.
- if ( (sup && (!losers[pos].sup || losers[pos].source < source)) ||
- (!sup && !losers[pos].sup &&
- ((comp(*losers[pos].keyp, *keyp)) ||
- (!comp(*keyp, *losers[pos].keyp)
- && losers[pos].source < source))))
- {
- // The other one is smaller.
- std::swap(losers[pos].sup, sup);
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].keyp, keyp);
- }
- }
+ {
+ // The smaller one gets promoted, ties are broken by source.
+ if ( (sup && (!losers[pos].sup || losers[pos].source < source)) ||
+ (!sup && !losers[pos].sup &&
+ ((comp(*losers[pos].keyp, *keyp)) ||
+ (!comp(*keyp, *losers[pos].keyp)
+ && losers[pos].source < source))))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].sup, sup);
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].keyp, keyp);
+ }
+ }
losers[0].sup = sup;
losers[0].source = source;
@@ -773,13 +760,13 @@ namespace __gnu_parallel
#if _GLIBCXX_LOSER_TREE_UNGUARDED
- /** @brief Unguarded loser tree, copying the whole element into the
- * tree structure.
- *
- * No guarding is done, therefore not a single input sequence must
- * run empty. This is a very fast variant.
- */
- template<typename T, typename Comparator = std::less<T> >
+/** @brief Unguarded loser tree, copying the whole element into the
+* tree structure.
+*
+* No guarding is done, therefore not a single input sequence must
+* run empty. This is a very fast variant.
+*/
+template<typename T, typename Comparator = std::less<T> >
class LoserTreeUnguarded
{
private:
@@ -798,18 +785,20 @@ namespace __gnu_parallel
map(unsigned int root, unsigned int begin, unsigned int end)
{
if (begin + 1 == end)
- mapping[begin] = root;
+ mapping[begin] = root;
else
- {
- // Next greater or equal power of 2.
- unsigned int left = 1 << (log2(end - begin - 1));
- map(root * 2, begin, begin + left);
- map(root * 2 + 1, begin + left, end);
- }
+ {
+ // Next greater or equal power of 2.
+ unsigned int left = 1 << (log2(end - begin - 1));
+ map(root * 2, begin, begin + left);
+ map(root * 2 + 1, begin + left, end);
+ }
}
public:
- inline LoserTreeUnguarded(unsigned int _k, Comparator _comp = std::less<T>()) : comp(_comp)
+ inline
+ LoserTreeUnguarded(unsigned int _k, Comparator _comp = std::less<T>())
+ : comp(_comp)
{
ik = _k;
// Next greater or equal power of 2.
@@ -826,13 +815,6 @@ namespace __gnu_parallel
delete[] mapping;
}
- void
- print()
- {
- for (unsigned int i = 0; i < k + ik; i++)
- printf("%d %d from %d\n", i, losers[i].key, losers[i].source);
- }
-
inline int
get_min_source()
{ return losers[0].source; }
@@ -849,26 +831,27 @@ namespace __gnu_parallel
init_winner(unsigned int root, unsigned int begin, unsigned int end)
{
if (begin + 1 == end)
- return mapping[begin];
+ return mapping[begin];
else
- {
- // Next greater or equal power of 2.
- unsigned int division = 1 << (log2(end - begin - 1));
- unsigned int left = init_winner(2 * root, begin, begin + division);
- unsigned int right = init_winner(2 * root + 1, begin + division, end);
- if (!comp(losers[right].key, losers[left].key))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
+ {
+ // Next greater or equal power of 2.
+ unsigned int division = 1 << (log2(end - begin - 1));
+ unsigned int left = init_winner(2 * root, begin, begin + division);
+ unsigned int right =
+ init_winner(2 * root + 1, begin + division, end);
+ if (!comp(losers[right].key, losers[left].key))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
}
inline void
@@ -883,15 +866,15 @@ namespace __gnu_parallel
T& keyr = losers[0].key;
int& source = losers[0].source;
for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted.
- if (comp(losers[pos].key, keyr))
- {
- // The other one is smaller.
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].key, keyr);
- }
- }
+ {
+ // The smaller one gets promoted.
+ if (comp(losers[pos].key, keyr))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].key, keyr);
+ }
+ }
}
inline void
@@ -909,16 +892,17 @@ namespace __gnu_parallel
T& keyr = losers[0].key;
int& source = losers[0].source;
for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted, ties are broken by source.
- if (comp(losers[pos].key, keyr)
- || (!comp(keyr, losers[pos].key) && losers[pos].source < source))
- {
- // The other one is smaller.
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].key, keyr);
- }
- }
+ {
+ // The smaller one gets promoted, ties are broken by source.
+ if (comp(losers[pos].key, keyr)
+ || (!comp(keyr, losers[pos].key)
+ && losers[pos].source < source))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].key, keyr);
+ }
+ }
}
};
@@ -926,13 +910,13 @@ namespace __gnu_parallel
#if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
- /** @brief Unguarded loser tree, keeping only pointers to the
- * elements in the tree structure.
- *
- * No guarding is done, therefore not a single input sequence must
- * run empty. This is a very fast variant.
- */
- template<typename T, typename Comparator = std::less<T> >
+/** @brief Unguarded loser tree, keeping only pointers to the
+* elements in the tree structure.
+*
+* No guarding is done, therefore not a single input sequence must
+* run empty. This is a very fast variant.
+*/
+template<typename T, typename Comparator = std::less<T> >
class LoserTreePointerUnguarded
{
private:
@@ -950,18 +934,21 @@ namespace __gnu_parallel
void map(unsigned int root, unsigned int begin, unsigned int end)
{
if (begin + 1 == end)
- mapping[begin] = root;
+ mapping[begin] = root;
else
- {
- // Next greater or equal power of 2.
- unsigned int left = 1 << (log2(end - begin - 1));
- map(root * 2, begin, begin + left);
- map(root * 2 + 1, begin + left, end);
- }
+ {
+ // Next greater or equal power of 2.
+ unsigned int left = 1 << (log2(end - begin - 1));
+ map(root * 2, begin, begin + left);
+ map(root * 2 + 1, begin + left, end);
+ }
}
public:
- inline LoserTreePointerUnguarded(unsigned int _k, Comparator _comp = std::less<T>()) : comp(_comp)
+ inline
+ LoserTreePointerUnguarded(unsigned int _k,
+ Comparator _comp = std::less<T>())
+ : comp(_comp)
{
ik = _k;
@@ -979,13 +966,6 @@ namespace __gnu_parallel
delete[] mapping;
}
- void
- print()
- {
- for (unsigned int i = 0; i < k + ik; i++)
- printf("%d %d from %d\n", i, *losers[i].keyp, losers[i].source);
- }
-
inline int
get_min_source()
{ return losers[0].source; }
@@ -1002,26 +982,27 @@ namespace __gnu_parallel
init_winner(unsigned int root, unsigned int begin, unsigned int end)
{
if (begin + 1 == end)
- return mapping[begin];
+ return mapping[begin];
else
- {
- // Next greater or equal power of 2.
- unsigned int division = 1 << (log2(end - begin - 1));
- unsigned int left = init_winner(2 * root, begin, begin + division);
- unsigned int right = init_winner(2 * root + 1, begin + division, end);
- if (!comp(*losers[right].keyp, *losers[left].keyp))
- {
- // Left one is less or equal.
- losers[root] = losers[right];
- return left;
- }
- else
- {
- // Right one is less.
- losers[root] = losers[left];
- return right;
- }
- }
+ {
+ // Next greater or equal power of 2.
+ unsigned int division = 1 << (log2(end - begin - 1));
+ unsigned int left = init_winner(2 * root, begin, begin + division);
+ unsigned int right
+ = init_winner(2 * root + 1, begin + division, end);
+ if (!comp(*losers[right].keyp, *losers[left].keyp))
+ {
+ // Left one is less or equal.
+ losers[root] = losers[right];
+ return left;
+ }
+ else
+ {
+ // Right one is less.
+ losers[root] = losers[left];
+ return right;
+ }
+ }
}
inline void
@@ -1036,15 +1017,15 @@ namespace __gnu_parallel
const T* keyp = &key;
int& source = losers[0].source;
for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted.
- if (comp(*losers[pos].keyp, *keyp))
- {
- // The other one is smaller.
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].keyp, keyp);
- }
- }
+ {
+ // The smaller one gets promoted.
+ if (comp(*losers[pos].keyp, *keyp))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].keyp, keyp);
+ }
+ }
losers[0].keyp = keyp;
}
@@ -1063,23 +1044,23 @@ namespace __gnu_parallel
int& source = losers[0].source;
const T* keyp = &key;
for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
- {
- // The smaller one gets promoted, ties are broken by source.
- if (comp(*losers[pos].keyp, *keyp)
- || (!comp(*keyp, *losers[pos].keyp)
- && losers[pos].source < source))
- {
- // The other one is smaller.
- std::swap(losers[pos].source, source);
- std::swap(losers[pos].keyp, keyp);
- }
- }
+ {
+ // The smaller one gets promoted, ties are broken by source.
+ if (comp(*losers[pos].keyp, *keyp)
+ || (!comp(*keyp, *losers[pos].keyp)
+ && losers[pos].source < source))
+ {
+ // The other one is smaller.
+ std::swap(losers[pos].source, source);
+ std::swap(losers[pos].keyp, keyp);
+ }
+ }
losers[0].keyp = keyp;
}
};
#endif
- template<typename _ValueTp, class Comparator>
+template<typename _ValueTp, class Comparator>
struct loser_tree_traits
{
#if _GLIBCXX_LOSER_TREE
@@ -1093,7 +1074,7 @@ namespace __gnu_parallel
#endif
};
- template<typename _ValueTp, class Comparator>
+template<typename _ValueTp, class Comparator>
struct loser_tree_unguarded_traits
{
#if _GLIBCXX_LOSER_TREE_UNGUARDED
diff --git a/libstdc++-v3/include/parallel/multiway_merge.h b/libstdc++-v3/include/parallel/multiway_merge.h
index 2e3be7c..67f2f8c 100644
--- a/libstdc++-v3/include/parallel/multiway_merge.h
+++ b/libstdc++-v3/include/parallel/multiway_merge.h
@@ -29,16 +29,16 @@
// Public License.
/** @file parallel/multiway_merge.h
- * @brief Implementation of sequential and parallel multiway merge.
- *
- * Explanations on the high-speed merging routines in the appendix of
- *
- * P. Sanders.
- * Fast priority queues for cached memory.
- * ACM Journal of Experimental Algorithmics, 5, 2000.
- *
- * This file is a GNU parallel extension to the Standard C++ Library.
- */
+* @brief Implementation of sequential and parallel multiway merge.
+*
+* Explanations on the high-speed merging routines in the appendix of
+*
+* P. Sanders.
+* Fast priority queues for cached memory.
+* ACM Journal of Experimental Algorithmics, 5, 2000.
+*
+* This file is a GNU parallel extension to the Standard C++ Library.
+*/
// Written by Johannes Singler.
@@ -62,25 +62,25 @@
// XXX need iterator typedefs
namespace __gnu_parallel
{
- template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
class guarded_iterator;
- template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
- guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+ guarded_iterator<RandomAccessIterator, Comparator>& bi2);
- template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
- guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+ guarded_iterator<RandomAccessIterator, Comparator>& bi2);
/** @brief Iterator wrapper supporting an implicit supremum at the end
of the sequence, dominating all comparisons.
* Deriving from RandomAccessIterator is not possible since
* RandomAccessIterator need not be a class.
*/
- template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
class guarded_iterator
{
private:
@@ -95,17 +95,17 @@ namespace __gnu_parallel
public:
/** @brief Constructor. Sets iterator to beginning of sequence.
- * @param begin Begin iterator of sequence.
- * @param end End iterator of sequence.
- * @param comp Comparator provided for associated overloaded
- * compare operators. */
- inline guarded_iterator(RandomAccessIterator begin,
- RandomAccessIterator end, Comparator& comp)
+ * @param begin Begin iterator of sequence.
+ * @param end End iterator of sequence.
+ * @param comp Comparator provided for associated overloaded
+ * compare operators. */
+ inline guarded_iterator(RandomAccessIterator begin,
+ RandomAccessIterator end, Comparator& comp)
: current(begin), end(end), comp(comp)
{ }
/** @brief Pre-increment operator.
- * @return This. */
+ * @return This. */
inline guarded_iterator<RandomAccessIterator, Comparator>&
operator++()
{
@@ -114,31 +114,35 @@ namespace __gnu_parallel
}
/** @brief Dereference operator.
- * @return Referenced element. */
+ * @return Referenced element. */
inline typename std::iterator_traits<RandomAccessIterator>::value_type
operator*()
{ return *current; }
/** @brief Convert to wrapped iterator.
- * @return Wrapped iterator. */
+ * @return Wrapped iterator. */
inline operator RandomAccessIterator()
{ return current; }
friend bool
- operator< <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+ operator< <RandomAccessIterator, Comparator>(
+ guarded_iterator<RandomAccessIterator, Comparator>& bi1,
+ guarded_iterator<RandomAccessIterator, Comparator>& bi2);
friend bool
- operator<= <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+ operator<= <RandomAccessIterator, Comparator>(
+ guarded_iterator<RandomAccessIterator, Comparator>& bi1,
+ guarded_iterator<RandomAccessIterator, Comparator>& bi2);
};
- /** @brief Compare two elements referenced by guarded iterators.
- * @param bi1 First iterator.
- * @param bi2 Second iterator.
- * @return @c True if less. */
- template<typename RandomAccessIterator, typename Comparator>
+/** @brief Compare two elements referenced by guarded iterators.
+ * @param bi1 First iterator.
+ * @param bi2 Second iterator.
+ * @return @c True if less. */
+template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
- guarded_iterator<RandomAccessIterator, Comparator>& bi2)
+ guarded_iterator<RandomAccessIterator, Comparator>& bi2)
{
if (bi1.current == bi1.end) //bi1 is sup
return bi2.current == bi2.end; //bi2 is not sup
@@ -147,14 +151,14 @@ namespace __gnu_parallel
return (bi1.comp)(*bi1, *bi2); //normal compare
}
- /** @brief Compare two elements referenced by guarded iterators.
- * @param bi1 First iterator.
- * @param bi2 Second iterator.
- * @return @c True if less equal. */
- template<typename RandomAccessIterator, typename Comparator>
+/** @brief Compare two elements referenced by guarded iterators.
+ * @param bi1 First iterator.
+ * @param bi2 Second iterator.
+ * @return @c True if less equal. */
+template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
- guarded_iterator<RandomAccessIterator, Comparator>& bi2)
+ guarded_iterator<RandomAccessIterator, Comparator>& bi2)
{
if (bi2.current == bi2.end) //bi1 is sup
return bi1.current != bi1.end; //bi2 is not sup
@@ -163,20 +167,20 @@ namespace __gnu_parallel
return !(bi1.comp)(*bi2, *bi1); //normal compare
}
- template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
class unguarded_iterator;
- template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
- unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
+ unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
- template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
- unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
+ unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
- template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
class unguarded_iterator
{
private:
@@ -187,16 +191,16 @@ namespace __gnu_parallel
public:
/** @brief Constructor. Sets iterator to beginning of sequence.
- * @param begin Begin iterator of sequence.
- * @param end Unused, only for compatibility.
- * @param comp Unused, only for compatibility. */
- inline unguarded_iterator(RandomAccessIterator begin,
- RandomAccessIterator end, Comparator& comp)
+ * @param begin Begin iterator of sequence.
+ * @param end Unused, only for compatibility.
+ * @param comp Unused, only for compatibility. */
+ inline unguarded_iterator(RandomAccessIterator begin,
+ RandomAccessIterator end, Comparator& comp)
: current(begin), comp(comp)
{ }
/** @brief Pre-increment operator.
- * @return This. */
+ * @return This. */
inline unguarded_iterator<RandomAccessIterator, Comparator>&
operator++()
{
@@ -205,77 +209,85 @@ namespace __gnu_parallel
}
/** @brief Dereference operator.
- * @return Referenced element. */
- inline typename std::iterator_traits<RandomAccessIterator>::value_type
+ * @return Referenced element. */
+ inline typename std::iterator_traits<RandomAccessIterator>::value_type
operator*()
{ return *current; }
/** @brief Convert to wrapped iterator.
- * @return Wrapped iterator. */
+ * @return Wrapped iterator. */
inline
operator RandomAccessIterator()
{ return current; }
friend bool
- operator< <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
+ operator< <RandomAccessIterator, Comparator>(
+ unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
+ unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
friend bool
- operator<= <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
+ operator<= <RandomAccessIterator, Comparator>(
+ unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
+ unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
};
- /** @brief Compare two elements referenced by unguarded iterators.
- * @param bi1 First iterator.
- * @param bi2 Second iterator.
- * @return @c True if less. */
- template<typename RandomAccessIterator, typename Comparator>
+/** @brief Compare two elements referenced by unguarded iterators.
+ * @param bi1 First iterator.
+ * @param bi2 Second iterator.
+ * @return @c True if less. */
+template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
- unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
+ unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
{
// Normal compare.
return (bi1.comp)(*bi1, *bi2);
}
- /** @brief Compare two elements referenced by unguarded iterators.
- * @param bi1 First iterator.
- * @param bi2 Second iterator.
- * @return @c True if less equal. */
- template<typename RandomAccessIterator, typename Comparator>
+/** @brief Compare two elements referenced by unguarded iterators.
+ * @param bi1 First iterator.
+ * @param bi2 Second iterator.
+ * @return @c True if less equal. */
+template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
- unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
+ unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
{
// Normal compare.
return !(bi1.comp)(*bi2, *bi1);
}
- /** Prepare a set of sequences to be merged without a (end) guard
- * @param seqs_begin
- * @param seqs_end
- * @param comp
- * @param min_sequence
- * @param stable
- * @pre (seqs_end - seqs_begin > 0) */
- template<typename RandomAccessIteratorIterator, typename Comparator>
- typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
+/** Prepare a set of sequences to be merged without a (end) guard
+ * @param seqs_begin
+ * @param seqs_end
+ * @param comp
+ * @param min_sequence
+ * @param stable
+ * @pre (seqs_end - seqs_begin > 0) */
+template<typename RandomAccessIteratorIterator, typename Comparator>
+ typename std::iterator_traits<
+ typename std::iterator_traits<RandomAccessIteratorIterator>::value_type
+ ::first_type>::difference_type
prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
- RandomAccessIteratorIterator seqs_end, Comparator comp,
- int& min_sequence, bool stable)
+ RandomAccessIteratorIterator seqs_end, Comparator comp,
+ int& min_sequence, bool stable)
{
_GLIBCXX_CALL(seqs_end - seqs_begin)
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
- typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
+ typedef typename std::iterator_traits<RandomAccessIterator1>
+ ::difference_type
difference_type;
if ((*seqs_begin).first == (*seqs_begin).second)
{
- // Empty sequence found, it's the first one.
- min_sequence = 0;
- return -1;
+ // Empty sequence found, it's the first one.
+ min_sequence = 0;
+ return -1;
}
// Last element in sequence.
@@ -283,20 +295,20 @@ namespace __gnu_parallel
min_sequence = 0;
for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; s++)
{
- if ((*s).first == (*s).second)
- {
- // Empty sequence found.
- min_sequence = static_cast<int>(s - seqs_begin);
- return -1;
- }
-
- // Last element in sequence.
- const value_type& v = *((*s).second - 1);
- if (comp(v, min)) //strictly smaller
- {
- min = v;
- min_sequence = static_cast<int>(s - seqs_begin);
- }
+ if ((*s).first == (*s).second)
+ {
+ // Empty sequence found.
+ min_sequence = static_cast<int>(s - seqs_begin);
+ return -1;
+ }
+
+ // Last element in sequence.
+ const value_type& v = *((*s).second - 1);
+ if (comp(v, min)) //strictly smaller
+ {
+ min = v;
+ min_sequence = static_cast<int>(s - seqs_begin);
+ }
}
difference_type overhang_size = 0;
@@ -304,93 +316,108 @@ namespace __gnu_parallel
int s = 0;
for (s = 0; s <= min_sequence; s++)
{
- RandomAccessIterator1 split;
- if (stable)
- split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
- min, comp);
- else
- split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
- min, comp);
-
- overhang_size += seqs_begin[s].second - split;
+ RandomAccessIterator1 split;
+ if (stable)
+ split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
+ min, comp);
+ else
+ split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
+ min, comp);
+
+ overhang_size += seqs_begin[s].second - split;
}
for (; s < (seqs_end - seqs_begin); s++)
{
- RandomAccessIterator1 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, min, comp);
- overhang_size += seqs_begin[s].second - split;
+ RandomAccessIterator1 split = std::lower_bound(
+ seqs_begin[s].first, seqs_begin[s].second, min, comp);
+ overhang_size += seqs_begin[s].second - split;
}
// So many elements will be left over afterwards.
return overhang_size;
}
- /** Prepare a set of sequences to be merged with a (end) guard (sentinel)
- * @param seqs_begin
- * @param seqs_end
- * @param comp */
- template<typename RandomAccessIteratorIterator, typename Comparator>
- typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
+/** Prepare a set of sequences to be merged with a (end) guard (sentinel)
+ * @param seqs_begin
+ * @param seqs_end
+ * @param comp */
+template<typename RandomAccessIteratorIterator, typename Comparator>
+ typename std::iterator_traits<typename std::iterator_traits<
+ RandomAccessIteratorIterator>::value_type::first_type>::difference_type
prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
- RandomAccessIteratorIterator seqs_end,
- Comparator comp)
+ RandomAccessIteratorIterator seqs_end,
+ Comparator comp)
{
_GLIBCXX_CALL(seqs_end - seqs_begin)
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
RandomAccessIterator1;
- typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+ typedef typename std::iterator_traits<RandomAccessIterator1>
+ ::value_type
value_type;
- typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
+ typedef typename std::iterator_traits<RandomAccessIterator1>
+ ::difference_type
difference_type;
// Last element in sequence.
value_type* max = NULL;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
{
- if ((*s).first == (*s).second)
- continue;
+ if ((*s).first == (*s).second)
+ continue;
- // Last element in sequence.
- value_type& v = *((*s).second - 1);
+ // Last element in sequence.
+ value_type& v = *((*s).second - 1);
- // Strictly greater.
- if (!max || comp(*max, v))
- max = &v;
+ // Strictly greater.
+ if (!max || comp(*max, v))
+ max = &v;
}
difference_type overhang_size = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
{
- RandomAccessIterator1 split = std::lower_bound((*s).first, (*s).second,
- *max, comp);
- overhang_size += (*s).second - split;
+ RandomAccessIterator1 split =
+ std::lower_bound((*s).first, (*s).second, *max, comp);
+ overhang_size += (*s).second - split;
- // Set sentinel.
- *((*s).second) = *max;
+ // Set sentinel.
+ *((*s).second) = *max;
}
// So many elements will be left over afterwards.
return overhang_size;
}
- /** @brief Highly efficient 3-way merging procedure.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Unused, stable anyway.
- * @return End iterator of output sequence. */
- template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Highly efficient 3-way merging procedure.
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ * @param stable Unused, stable anyway.
+ * @return End iterator of output sequence. */
+template<
+ template<typename RAI, typename C> class iterator,
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
- multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+ multiway_merge_3_variant(
+ RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp, _DifferenceTp length, bool stable)
{
_GLIBCXX_CALL(length);
-
+
typedef _DifferenceTp difference_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
@@ -405,25 +432,25 @@ namespace __gnu_parallel
if (seq0 <= seq1)
{
- if (seq1 <= seq2)
- goto s012;
- else
- if (seq2 < seq0)
- goto s201;
- else
- goto s021;
+ if (seq1 <= seq2)
+ goto s012;
+ else
+ if (seq2 < seq0)
+ goto s201;
+ else
+ goto s021;
}
else
{
- if (seq1 <= seq2)
- {
- if (seq0 <= seq2)
- goto s102;
- else
- goto s120;
- }
- else
- goto s210;
+ if (seq1 <= seq2)
+ {
+ if (seq0 <= seq2)
+ goto s102;
+ else
+ goto s120;
+ }
+ else
+ goto s210;
}
#define Merge3Case(a,b,c,c0,c1) \
@@ -456,14 +483,23 @@ namespace __gnu_parallel
return target;
}
- template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+template<
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
- multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+ multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp,
+ _DifferenceTp length, bool stable)
{
_GLIBCXX_CALL(length);
-
+
typedef _DifferenceTp difference_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
@@ -472,7 +508,8 @@ namespace __gnu_parallel
RandomAccessIterator3 target_end;
// Stable anyway.
- difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
+ difference_type overhang =
+ prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
difference_type total_length = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
@@ -480,16 +517,17 @@ namespace __gnu_parallel
if (overhang != -1)
{
- difference_type unguarded_length = std::min(length, total_length - overhang);
- target_end = multiway_merge_3_variant<unguarded_iterator>
- (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
- overhang = length - unguarded_length;
+ difference_type unguarded_length =
+ std::min(length, total_length - overhang);
+ target_end = multiway_merge_3_variant<unguarded_iterator>
+ (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
+ overhang = length - unguarded_length;
}
else
{
- // Empty sequence found.
- overhang = length;
- target_end = target;
+ // Empty sequence found.
+ overhang = length;
+ target_end = target;
}
#if _GLIBCXX_ASSERTIONS
@@ -500,23 +538,23 @@ namespace __gnu_parallel
switch (min_seq)
{
case 0:
- // Iterators will be advanced accordingly.
- target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
- seqs_begin[2].first, seqs_begin[2].second,
- target_end, overhang, comp);
- break;
+ // Iterators will be advanced accordingly.
+ target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
+ seqs_begin[2].first, seqs_begin[2].second,
+ target_end, overhang, comp);
+ break;
case 1:
- target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
- seqs_begin[2].first, seqs_begin[2].second,
- target_end, overhang, comp);
- break;
+ target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
+ seqs_begin[2].first, seqs_begin[2].second,
+ target_end, overhang, comp);
+ break;
case 2:
- target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
- seqs_begin[1].first, seqs_begin[1].second,
- target_end, overhang, comp);
- break;
+ target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
+ seqs_begin[1].first, seqs_begin[1].second,
+ target_end, overhang, comp);
+ break;
default:
- _GLIBCXX_PARALLEL_ASSERT(false);
+ _GLIBCXX_PARALLEL_ASSERT(false);
}
#if _GLIBCXX_ASSERTIONS
@@ -527,22 +565,31 @@ namespace __gnu_parallel
return target_end;
}
- /** @brief Highly efficient 4-way merging procedure.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Unused, stable anyway.
- * @return End iterator of output sequence. */
- template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Highly efficient 4-way merging procedure.
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ * @param stable Unused, stable anyway.
+ * @return End iterator of output sequence. */
+template<
+ template<typename RAI, typename C> class iterator,
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
- multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+ multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp, _DifferenceTp length, bool stable)
{
_GLIBCXX_CALL(length);
typedef _DifferenceTp difference_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
@@ -561,26 +608,26 @@ namespace __gnu_parallel
if (seq0 <= seq1)
{
- if (seq1 <= seq2)
- Decision(0,1,2,3)
- else
- if (seq2 < seq0)
- Decision(2,0,1,3)
- else
- Decision(0,2,1,3)
- }
+ if (seq1 <= seq2)
+ Decision(0,1,2,3)
+ else
+ if (seq2 < seq0)
+ Decision(2,0,1,3)
+ else
+ Decision(0,2,1,3)
+ }
else
{
- if (seq1 <= seq2)
- {
- if (seq0 <= seq2)
- Decision(1,0,2,3)
- else
- Decision(1,2,0,3)
- }
- else
- Decision(2,1,0,3)
- }
+ if (seq1 <= seq2)
+ {
+ if (seq0 <= seq2)
+ Decision(1,0,2,3)
+ else
+ Decision(1,2,0,3)
+ }
+ else
+ Decision(2,1,0,3)
+ }
#define Merge4Case(a,b,c,d,c0,c1,c2) \
s ## a ## b ## c ## d: \
@@ -633,14 +680,23 @@ namespace __gnu_parallel
return target;
}
- template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+template<
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
- multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+ multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp,
+ _DifferenceTp length, bool stable)
{
_GLIBCXX_CALL(length);
typedef _DifferenceTp difference_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
@@ -649,7 +705,8 @@ namespace __gnu_parallel
RandomAccessIterator3 target_end;
// Stable anyway.
- difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
+ difference_type overhang =
+ prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
difference_type total_length = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
@@ -657,16 +714,17 @@ namespace __gnu_parallel
if (overhang != -1)
{
- difference_type unguarded_length = std::min(length, total_length - overhang);
- target_end = multiway_merge_4_variant<unguarded_iterator>
- (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
- overhang = length - unguarded_length;
+ difference_type unguarded_length =
+ std::min(length, total_length - overhang);
+ target_end = multiway_merge_4_variant<unguarded_iterator>
+ (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
+ overhang = length - unguarded_length;
}
else
{
- // Empty sequence found.
- overhang = length;
- target_end = target;
+ // Empty sequence found.
+ overhang = length;
+ target_end = target;
}
#if _GLIBCXX_ASSERTIONS
@@ -674,10 +732,13 @@ namespace __gnu_parallel
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
- std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > one_missing(seqs_begin, seqs_end);
+ std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> >
+ one_missing(seqs_begin, seqs_end);
one_missing.erase(one_missing.begin() + min_seq); //remove
- target_end = multiway_merge_3_variant<guarded_iterator>(one_missing.begin(), one_missing.end(), target_end, comp, overhang, stable);
+ target_end = multiway_merge_3_variant<guarded_iterator>(
+ one_missing.begin(), one_missing.end(),
+ target_end, comp, overhang, stable);
// Insert back again.
one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
@@ -692,26 +753,34 @@ namespace __gnu_parallel
return target_end;
}
- /** @brief Basic multi-way merging procedure.
- *
- * The head elements are kept in a sorted array, new heads are
- * inserted linearly.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @return End iterator of output sequence.
- */
- template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Basic multi-way merging procedure.
+ *
+ * The head elements are kept in a sorted array, new heads are
+ * inserted linearly.
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ * @param stable Stable merging incurs a performance penalty.
+ * @return End iterator of output sequence.
+ */
+template<
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
- multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+ multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp, _DifferenceTp length, bool stable)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
@@ -719,7 +788,8 @@ namespace __gnu_parallel
// Num remaining pieces.
int k = static_cast<int>(seqs_end - seqs_begin), nrp;
- value_type* pl = static_cast<value_type*>(::operator new(sizeof(value_type) * k));
+ value_type* pl = static_cast<value_type*>(
+ ::operator new(sizeof(value_type) * k));
int* source = new int[k];
difference_type total_length = 0;
@@ -730,137 +800,138 @@ namespace __gnu_parallel
nrp = 0;
for (int pi = 0; pi < k; pi++)
{
- if (STOPS(pi) != POS(pi))
- {
- pl[nrp] = *(POS(pi));
- source[nrp] = pi;
- nrp++;
- total_length += LENGTH(seqs_begin[pi]);
- }
+ if (STOPS(pi) != POS(pi))
+ {
+ pl[nrp] = *(POS(pi));
+ source[nrp] = pi;
+ nrp++;
+ total_length += LENGTH(seqs_begin[pi]);
+ }
}
if (stable)
{
- for (int k = 0; k < nrp - 1; k++)
- for (int pi = nrp - 1; pi > k; pi--)
- if (comp(pl[pi], pl[pi - 1]) ||
- (!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1]))
- {
- std::swap(pl[pi - 1], pl[pi]);
- std::swap(source[pi - 1], source[pi]);
- }
+ for (int k = 0; k < nrp - 1; k++)
+ for (int pi = nrp - 1; pi > k; pi--)
+ if (comp(pl[pi], pl[pi - 1]) ||
+ (!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1]))
+ {
+ std::swap(pl[pi - 1], pl[pi]);
+ std::swap(source[pi - 1], source[pi]);
+ }
}
else
{
- for (int k = 0; k < nrp - 1; k++)
- for (int pi = nrp - 1; pi > k; pi--)
- if (comp(pl[pi], pl[pi-1]))
- {
- std::swap(pl[pi-1], pl[pi]);
- std::swap(source[pi-1], source[pi]);
- }
+ for (int k = 0; k < nrp - 1; k++)
+ for (int pi = nrp - 1; pi > k; pi--)
+ if (comp(pl[pi], pl[pi-1]))
+ {
+ std::swap(pl[pi-1], pl[pi]);
+ std::swap(source[pi-1], source[pi]);
+ }
}
// Iterate.
if (stable)
{
- int j;
- while (nrp > 0 && length > 0)
- {
- if (source[0] < source[1])
- {
- // pl[0] <= pl[1]
- while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0)
- {
- *target = pl[0];
- ++target;
- ++POS(source[0]);
- length--;
- if (POS(source[0]) == STOPS(source[0]))
- {
- // Move everything to the left.
- for (int s = 0; s < nrp - 1; s++)
- {
- pl[s] = pl[s + 1];
- source[s] = source[s + 1];
- }
- nrp--;
- break;
- }
- else
- pl[0] = *(POS(source[0]));
- }
- }
- else
- {
- // pl[0] < pl[1]
- while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0)
- {
- *target = pl[0];
- ++target;
- ++POS(source[0]);
- length--;
- if (POS(source[0]) == STOPS(source[0]))
- {
- for (int s = 0; s < nrp - 1; s++)
- {
- pl[s] = pl[s + 1];
- source[s] = source[s + 1];
- }
- nrp--;
- break;
- }
- else
- pl[0] = *(POS(source[0]));
- }
- }
-
- // Sink down.
- j = 1;
- while ((j < nrp) && (comp(pl[j], pl[j - 1]) ||
- (!comp(pl[j - 1], pl[j]) && (source[j] < source[j - 1]))))
- {
- std::swap(pl[j - 1], pl[j]);
- std::swap(source[j - 1], source[j]);
- j++;
- }
- }
+ int j;
+ while (nrp > 0 && length > 0)
+ {
+ if (source[0] < source[1])
+ {
+ // pl[0] <= pl[1]
+ while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0)
+ {
+ *target = pl[0];
+ ++target;
+ ++POS(source[0]);
+ length--;
+ if (POS(source[0]) == STOPS(source[0]))
+ {
+ // Move everything to the left.
+ for (int s = 0; s < nrp - 1; s++)
+ {
+ pl[s] = pl[s + 1];
+ source[s] = source[s + 1];
+ }
+ nrp--;
+ break;
+ }
+ else
+ pl[0] = *(POS(source[0]));
+ }
+ }
+ else
+ {
+ // pl[0] < pl[1]
+ while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0)
+ {
+ *target = pl[0];
+ ++target;
+ ++POS(source[0]);
+ length--;
+ if (POS(source[0]) == STOPS(source[0]))
+ {
+ for (int s = 0; s < nrp - 1; s++)
+ {
+ pl[s] = pl[s + 1];
+ source[s] = source[s + 1];
+ }
+ nrp--;
+ break;
+ }
+ else
+ pl[0] = *(POS(source[0]));
+ }
+ }
+
+ // Sink down.
+ j = 1;
+ while ((j < nrp) && (comp(pl[j], pl[j - 1]) ||
+ (!comp(pl[j - 1], pl[j])
+ && (source[j] < source[j - 1]))))
+ {
+ std::swap(pl[j - 1], pl[j]);
+ std::swap(source[j - 1], source[j]);
+ j++;
+ }
+ }
}
else
{
- int j;
- while (nrp > 0 && length > 0)
- {
- // pl[0] <= pl[1]
- while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0)
- {
- *target = pl[0];
- ++target;
- ++POS(source[0]);
- length--;
- if (POS(source[0]) == STOPS(source[0]))
- {
- for (int s = 0; s < (nrp - 1); s++)
- {
- pl[s] = pl[s + 1];
- source[s] = source[s + 1];
- }
- nrp--;
- break;
- }
- else
- pl[0] = *(POS(source[0]));
- }
-
- // Sink down.
- j = 1;
- while ((j < nrp) && comp(pl[j], pl[j - 1]))
- {
- std::swap(pl[j - 1], pl[j]);
- std::swap(source[j - 1], source[j]);
- j++;
- }
- }
+ int j;
+ while (nrp > 0 && length > 0)
+ {
+ // pl[0] <= pl[1]
+ while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0)
+ {
+ *target = pl[0];
+ ++target;
+ ++POS(source[0]);
+ length--;
+ if (POS(source[0]) == STOPS(source[0]))
+ {
+ for (int s = 0; s < (nrp - 1); s++)
+ {
+ pl[s] = pl[s + 1];
+ source[s] = source[s + 1];
+ }
+ nrp--;
+ break;
+ }
+ else
+ pl[0] = *(POS(source[0]));
+ }
+
+ // Sink down.
+ j = 1;
+ while ((j < nrp) && comp(pl[j], pl[j - 1]))
+ {
+ std::swap(pl[j - 1], pl[j]);
+ std::swap(source[j - 1], source[j]);
+ j++;
+ }
+ }
}
delete[] pl;
@@ -869,26 +940,36 @@ namespace __gnu_parallel
return target;
}
- /** @brief Multi-way merging procedure for a high branching factor,
- * guarded case.
- *
- * The head elements are kept in a loser tree.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @return End iterator of output sequence.
- */
- template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Multi-way merging procedure for a high branching factor,
+ * guarded case.
+ *
+ * The head elements are kept in a loser tree.
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ * @param stable Stable merging incurs a performance penalty.
+ * @return End iterator of output sequence.
+ */
+template<
+ typename LT,
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
- multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+ multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp,
+ _DifferenceTp length, bool stable)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
@@ -941,64 +1022,73 @@ namespace __gnu_parallel
if (stable)
{
- for (difference_type i = 0; i < total_length; i++)
- {
- // Take out.
- source = lt.get_min_source();
+ for (difference_type i = 0; i < total_length; i++)
+ {
+ // Take out.
+ source = lt.get_min_source();
- *(target++) = *(seqs_begin[source].first++);
+ *(target++) = *(seqs_begin[source].first++);
- // Feed.
- if (seqs_begin[source].first == seqs_begin[source].second)
- lt.delete_min_insert_stable(*arbitrary_element, true);
- else
- // Replace from same source.
- lt.delete_min_insert_stable(*seqs_begin[source].first, false);
+ // Feed.
+ if (seqs_begin[source].first == seqs_begin[source].second)
+ lt.delete_min_insert_stable(*arbitrary_element, true);
+ else
+ // Replace from same source.
+ lt.delete_min_insert_stable(*seqs_begin[source].first, false);
- }
+ }
}
else
{
- for (difference_type i = 0; i < total_length; i++)
- {
- //take out
- source = lt.get_min_source();
-
- *(target++) = *(seqs_begin[source].first++);
-
- // Feed.
- if (seqs_begin[source].first == seqs_begin[source].second)
- lt.delete_min_insert(*arbitrary_element, true);
- else
- // Replace from same source.
- lt.delete_min_insert(*seqs_begin[source].first, false);
- }
+ for (difference_type i = 0; i < total_length; i++)
+ {
+ //take out
+ source = lt.get_min_source();
+
+ *(target++) = *(seqs_begin[source].first++);
+
+ // Feed.
+ if (seqs_begin[source].first == seqs_begin[source].second)
+ lt.delete_min_insert(*arbitrary_element, true);
+ else
+ // Replace from same source.
+ lt.delete_min_insert(*seqs_begin[source].first, false);
+ }
}
return target;
}
- /** @brief Multi-way merging procedure for a high branching factor,
- * unguarded case.
- *
- * The head elements are kept in a loser tree.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @return End iterator of output sequence.
- * @pre No input will run out of elements during the merge.
- */
- template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Multi-way merging procedure for a high branching factor,
+ * unguarded case.
+ *
+ * The head elements are kept in a loser tree.
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ * @param stable Stable merging incurs a performance penalty.
+ * @return End iterator of output sequence.
+ * @pre No input will run out of elements during the merge.
+ */
+template<
+ typename LT,
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp, typename Comparator>
RandomAccessIterator3
- multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+ multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp,
+ _DifferenceTp length, bool stable)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
@@ -1012,14 +1102,14 @@ namespace __gnu_parallel
for (int t = 0; t < k; t++)
{
#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
+ _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
#endif
- if (stable)
- lt.insert_start_stable(*seqs_begin[t].first, t, false);
- else
- lt.insert_start(*seqs_begin[t].first, t, false);
+ if (stable)
+ lt.insert_start_stable(*seqs_begin[t].first, t, false);
+ else
+ lt.insert_start(*seqs_begin[t].first, t, false);
- total_length += LENGTH(seqs_begin[t]);
+ total_length += LENGTH(seqs_begin[t]);
}
if (stable)
@@ -1038,68 +1128,84 @@ namespace __gnu_parallel
if (stable)
{
- RandomAccessIterator3 target_end = target + length;
- while (target < target_end)
- {
- // Take out.
- source = lt.get_min_source();
+ RandomAccessIterator3 target_end = target + length;
+ while (target < target_end)
+ {
+ // Take out.
+ source = lt.get_min_source();
#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
+ _GLIBCXX_PARALLEL_ASSERT(i == 0
+ || !comp(*(seqs_begin[source].first), *(target - 1)));
#endif
- *(target++) = *(seqs_begin[source].first++);
+ *(target++) = *(seqs_begin[source].first++);
#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i == length - 1));
- i++;
+ _GLIBCXX_PARALLEL_ASSERT(
+ (seqs_begin[source].first != seqs_begin[source].second)
+ || (i == length - 1));
+ i++;
#endif
- // Feed.
- // Replace from same source.
- lt.delete_min_insert_stable(*seqs_begin[source].first, false);
+ // Feed.
+ // Replace from same source.
+ lt.delete_min_insert_stable(*seqs_begin[source].first, false);
- }
+ }
}
else
{
- RandomAccessIterator3 target_end = target + length;
- while (target < target_end)
- {
- // Take out.
- source = lt.get_min_source();
+ RandomAccessIterator3 target_end = target + length;
+ while (target < target_end)
+ {
+ // Take out.
+ source = lt.get_min_source();
#if _GLIBCXX_ASSERTIONS
- if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
- printf(" %i %i %i\n", length, i, source);
- _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
+ if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
+ printf(" %i %i %i\n", length, i, source);
+ _GLIBCXX_PARALLEL_ASSERT(i == 0
+ || !comp(*(seqs_begin[source].first), *(target - 1)));
#endif
- *(target++) = *(seqs_begin[source].first++);
+ *(target++) = *(seqs_begin[source].first++);
#if _GLIBCXX_ASSERTIONS
- if (!((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1)))
- printf(" %i %i %i\n", length, i, source);
- _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1));
- i++;
+ if (!((seqs_begin[source].first != seqs_begin[source].second)
+ || (i >= length - 1)))
+ printf(" %i %i %i\n", length, i, source);
+ _GLIBCXX_PARALLEL_ASSERT(
+ (seqs_begin[source].first != seqs_begin[source].second)
+ || (i >= length - 1));
+ i++;
#endif
- // Feed.
- // Replace from same source.
- lt.delete_min_insert(*seqs_begin[source].first, false);
- }
+ // Feed.
+ // Replace from same source.
+ lt.delete_min_insert(*seqs_begin[source].first, false);
+ }
}
return target;
}
- template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+template<
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
- multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+ multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp,
+ _DifferenceTp length, bool stable)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
@@ -1107,7 +1213,7 @@ namespace __gnu_parallel
int min_seq;
RandomAccessIterator3 target_end;
difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
- comp, min_seq, stable);
+ comp, min_seq, stable);
difference_type total_length = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
@@ -1115,17 +1221,18 @@ namespace __gnu_parallel
if (overhang != -1)
{
- difference_type unguarded_length = std::min(length, total_length - overhang);
- target_end = multiway_merge_loser_tree_unguarded
- <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
- (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
- overhang = length - unguarded_length;
+ difference_type unguarded_length =
+ std::min(length, total_length - overhang);
+ target_end = multiway_merge_loser_tree_unguarded
+ <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
+ (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
+ overhang = length - unguarded_length;
}
else
{
- // Empty sequence found.
- overhang = length;
- target_end = target;
+ // Empty sequence found.
+ overhang = length;
+ target_end = target;
}
#if _GLIBCXX_ASSERTIONS
@@ -1145,34 +1252,43 @@ namespace __gnu_parallel
return target_end;
}
- template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+template<
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
- multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+ multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp,
+ _DifferenceTp length, bool stable)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
- RandomAccessIterator1;
RandomAccessIterator3 target_end;
- difference_type overhang = prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
+ difference_type overhang =
+ prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
difference_type total_length = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
{
- total_length += LENGTH(*s);
+ total_length += LENGTH(*s);
- // Sentinel spot.
- (*s).second++;
+ // Sentinel spot.
+ (*s).second++;
}
- difference_type unguarded_length = std::min(length, total_length - overhang);
+ difference_type unguarded_length =
+ std::min(length, total_length - overhang);
target_end = multiway_merge_loser_tree_unguarded
<typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
(seqs_begin, seqs_end, target, comp, unguarded_length, stable);
@@ -1184,14 +1300,17 @@ namespace __gnu_parallel
#endif
// Copy rest stable.
- for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end && overhang > 0; s++)
+ for (RandomAccessIteratorIterator s = seqs_begin;
+ s != seqs_end && overhang > 0; s++)
{
- // Restore.
- (*s).second--;
- difference_type local_length = std::min((difference_type)overhang, (difference_type)LENGTH(*s));
- target_end = std::copy((*s).first, (*s).first + local_length, target_end);
- (*s).first += local_length;
- overhang -= local_length;
+ // Restore.
+ (*s).second--;
+ difference_type local_length =
+ std::min<difference_type>(overhang, LENGTH(*s));
+ target_end = std::copy((*s).first, (*s).first + local_length,
+ target_end);
+ (*s).first += local_length;
+ overhang -= local_length;
}
#if _GLIBCXX_ASSERTIONS
@@ -1203,25 +1322,35 @@ namespace __gnu_parallel
return target_end;
}
- /** @brief Sequential multi-way merging switch.
- *
- * The decision if based on the branching factor and runtime settings.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @param sentinel The sequences have a sentinel element.
- * @return End iterator of output sequence. */
- template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Sequential multi-way merging switch.
+ *
+ * The decision if based on the branching factor and runtime settings.
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ * @param stable Stable merging incurs a performance penalty.
+ * @param sentinel The sequences have a sentinel element.
+ * @return End iterator of output sequence. */
+template<
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
- multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel, sequential_tag)
+ multiway_merge(RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp, _DifferenceTp length,
+ bool stable, bool sentinel,
+ sequential_tag)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
@@ -1234,7 +1363,8 @@ namespace __gnu_parallel
RandomAccessIterator3 return_target = target;
int k = static_cast<int>(seqs_end - seqs_begin);
- Settings::MultiwayMergeAlgorithm mwma = Settings::multiway_merge_algorithm;
+ Settings::MultiwayMergeAlgorithm mwma =
+ Settings::multiway_merge_algorithm;
if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL)
mwma = Settings::LOSER_TREE_COMBINED;
@@ -1242,75 +1372,126 @@ namespace __gnu_parallel
switch (k)
{
case 0:
- break;
+ break;
case 1:
- return_target = std::copy(seqs_begin[0].first, seqs_begin[0].first + length, target);
- seqs_begin[0].first += length;
- break;
+ return_target = std::copy(seqs_begin[0].first,
+ seqs_begin[0].first + length,
+ target);
+ seqs_begin[0].first += length;
+ break;
case 2:
- return_target = merge_advance(seqs_begin[0].first, seqs_begin[0].second, seqs_begin[1].first, seqs_begin[1].second, target, length, comp);
- break;
+ return_target = merge_advance(seqs_begin[0].first,
+ seqs_begin[0].second,
+ seqs_begin[1].first,
+ seqs_begin[1].second,
+ target, length, comp);
+ break;
case 3:
- switch (mwma)
- {
- case Settings::LOSER_TREE_COMBINED:
- return_target = multiway_merge_3_combined(seqs_begin, seqs_end, target, comp, length, stable);
- break;
- case Settings::LOSER_TREE_SENTINEL:
- return_target = multiway_merge_3_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
- break;
- default:
- return_target = multiway_merge_3_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
- break;
- }
- break;
+ switch (mwma)
+ {
+ case Settings::LOSER_TREE_COMBINED:
+ return_target = multiway_merge_3_combined(seqs_begin,
+ seqs_end,
+ target,
+ comp, length, stable);
+ break;
+ case Settings::LOSER_TREE_SENTINEL:
+ return_target = multiway_merge_3_variant<unguarded_iterator>(
+ seqs_begin,
+ seqs_end,
+ target,
+ comp, length, stable);
+ break;
+ default:
+ return_target = multiway_merge_3_variant<guarded_iterator>(
+ seqs_begin,
+ seqs_end,
+ target,
+ comp, length, stable);
+ break;
+ }
+ break;
case 4:
- switch (mwma)
- {
- case Settings::LOSER_TREE_COMBINED:
- return_target = multiway_merge_4_combined(seqs_begin, seqs_end, target, comp, length, stable);
- break;
- case Settings::LOSER_TREE_SENTINEL:
- return_target = multiway_merge_4_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
- break;
- default:
- return_target = multiway_merge_4_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
- break;
- }
- break;
+ switch (mwma)
+ {
+ case Settings::LOSER_TREE_COMBINED:
+ return_target = multiway_merge_4_combined(
+ seqs_begin,
+ seqs_end,
+ target,
+ comp, length, stable);
+ break;
+ case Settings::LOSER_TREE_SENTINEL:
+ return_target = multiway_merge_4_variant<unguarded_iterator>(
+ seqs_begin,
+ seqs_end,
+ target,
+ comp, length, stable);
+ break;
+ default:
+ return_target = multiway_merge_4_variant<guarded_iterator>(
+ seqs_begin,
+ seqs_end,
+ target,
+ comp, length, stable);
+ break;
+ }
+ break;
default:
- {
- switch (mwma)
- {
- case Settings::BUBBLE:
- return_target = multiway_merge_bubble(seqs_begin, seqs_end, target, comp, length, stable);
- break;
+ {
+ switch (mwma)
+ {
+ case Settings::BUBBLE:
+ return_target = multiway_merge_bubble(
+ seqs_begin,
+ seqs_end,
+ target,
+ comp, length, stable);
+ break;
#if _GLIBCXX_LOSER_TREE_EXPLICIT
- case Settings::LOSER_TREE_EXPLICIT:
- return_target = multiway_merge_loser_tree<LoserTreeExplicit<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
- break;
+ case Settings::LOSER_TREE_EXPLICIT:
+ return_target = multiway_merge_loser_tree<
+ LoserTreeExplicit<value_type, Comparator> >(
+ seqs_begin,
+ seqs_end,
+ target,
+ comp, length, stable);
+ break;
#endif
#if _GLIBCXX_LOSER_TREE
- case Settings::LOSER_TREE:
- return_target = multiway_merge_loser_tree<LoserTree<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
- break;
+ case Settings::LOSER_TREE:
+ return_target = multiway_merge_loser_tree<
+ LoserTree<value_type, Comparator> >(
+ seqs_begin,
+ seqs_end,
+ target,
+ comp, length, stable);
+ break;
#endif
#if _GLIBCXX_LOSER_TREE_COMBINED
- case Settings::LOSER_TREE_COMBINED:
- return_target = multiway_merge_loser_tree_combined(seqs_begin, seqs_end, target, comp, length, stable);
- break;
+ case Settings::LOSER_TREE_COMBINED:
+ return_target = multiway_merge_loser_tree_combined(
+ seqs_begin,
+ seqs_end,
+ target,
+ comp, length, stable);
+ break;
#endif
#if _GLIBCXX_LOSER_TREE_SENTINEL
- case Settings::LOSER_TREE_SENTINEL:
- return_target = multiway_merge_loser_tree_sentinel(seqs_begin, seqs_end, target, comp, length, stable);
- break;
+ case Settings::LOSER_TREE_SENTINEL:
+ return_target = multiway_merge_loser_tree_sentinel(
+ seqs_begin,
+ seqs_end,
+ target,
+ comp, length, stable);
+ break;
#endif
- default:
- // multiway_merge algorithm not implemented.
- _GLIBCXX_PARALLEL_ASSERT(0);
- break;
- }
- }
+ default:
+ // multiway_merge algorithm not implemented.
+ _GLIBCXX_PARALLEL_ASSERT(0);
+ break;
+ }
+ }
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
@@ -1319,214 +1500,268 @@ namespace __gnu_parallel
return return_target;
}
- /** @brief Parallel multi-way merge routine.
- *
- * The decision if based on the branching factor and runtime settings.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @param sentinel Ignored.
- * @return End iterator of output sequence.
- */
- template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Parallel multi-way merge routine.
+ *
+ * The decision if based on the branching factor and runtime settings.
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ * @param stable Stable merging incurs a performance penalty.
+ * @param sentinel Ignored.
+ * @return End iterator of output sequence.
+ */
+template<
+ typename RandomAccessIteratorIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
- parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel)
- {
- _GLIBCXX_CALL(length)
-
- typedef _DifferenceTp difference_type;
- typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
- RandomAccessIterator1;
- typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
- value_type;
-
-#if _GLIBCXX_ASSERTIONS
- for (RandomAccessIteratorIterator rii = seqs_begin; rii != seqs_end; rii++)
- _GLIBCXX_PARALLEL_ASSERT(is_sorted((*rii).first, (*rii).second, comp));
-#endif
-
- // k sequences.
- int k = static_cast<int>(seqs_end - seqs_begin);
-
- difference_type total_length = 0;
- for (RandomAccessIteratorIterator raii = seqs_begin; raii != seqs_end; raii++)
- total_length += LENGTH(*raii);
-
- _GLIBCXX_CALL(total_length)
-
- if (total_length == 0 || k == 0)
- return target;
-
- thread_index_t num_threads = static_cast<thread_index_t>(std::min(static_cast<difference_type>(get_max_threads()), total_length));
-
- bool tight = (total_length == length);
-
- // Thread t will have to merge pieces[iam][0..k - 1]
- std::vector<std::pair<difference_type, difference_type> >* pieces = new std::vector<std::pair<difference_type, difference_type> >[num_threads];
- for (int s = 0; s < num_threads; s++)
- pieces[s].resize(k);
-
- difference_type num_samples = Settings::merge_oversampling * num_threads;
-
- if (Settings::multiway_merge_splitting == Settings::SAMPLING)
- {
- value_type* samples = static_cast<value_type*>(::operator new(sizeof(value_type) * k * num_samples));
- // Sample.
- for (int s = 0; s < k; s++)
- for (int i = 0; (difference_type)i < num_samples; i++)
- {
- difference_type sample_index = static_cast<difference_type>(LENGTH(seqs_begin[s]) * (double(i + 1) / (num_samples + 1)) * (double(length) / total_length));
- samples[s * num_samples + i] = seqs_begin[s].first[sample_index];
- }
-
- if (stable)
- __gnu_sequential::stable_sort(samples, samples + (num_samples * k), comp);
- else
- __gnu_sequential::sort(samples, samples + (num_samples * k), comp);
-
- for (int slab = 0; slab < num_threads; slab++)
- // For each slab / processor.
- for (int seq = 0; seq < k; seq++)
- {
- // For each sequence.
- if (slab > 0)
- pieces[slab][seq].first = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * slab / num_threads], comp) - seqs_begin[seq].first;
- else
- {
- // Absolute beginning.
- pieces[slab][seq].first = 0;
- }
- if ((slab + 1) < num_threads)
- pieces[slab][seq].second = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * (slab + 1) / num_threads], comp) - seqs_begin[seq].first;
- else
- pieces[slab][seq].second = LENGTH(seqs_begin[seq]); //absolute ending
- }
- delete[] samples;
- }
- else
- {
- // (Settings::multiway_merge_splitting == Settings::EXACT).
- std::vector<RandomAccessIterator1>* offsets = new std::vector<RandomAccessIterator1>[num_threads];
- std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > se(k);
-
- copy(seqs_begin, seqs_end, se.begin());
-
- difference_type* borders = static_cast<difference_type*>(__builtin_alloca(sizeof(difference_type) * (num_threads + 1)));
- equally_split(length, num_threads, borders);
-
- for (int s = 0; s < (num_threads - 1); s++)
- {
- offsets[s].resize(k);
- multiseq_partition(se.begin(), se.end(), borders[s + 1],
- offsets[s].begin(), comp);
-
- // Last one also needed and available.
- if (!tight)
- {
- offsets[num_threads - 1].resize(k);
- multiseq_partition(se.begin(), se.end(),
- difference_type(length),
- offsets[num_threads - 1].begin(), comp);
- }
- }
-
-
- for (int slab = 0; slab < num_threads; slab++)
- {
- // For each slab / processor.
- for (int seq = 0; seq < k; seq++)
- {
- // For each sequence.
- if (slab == 0)
- {
- // Absolute beginning.
- pieces[slab][seq].first = 0;
- }
- else
- pieces[slab][seq].first = pieces[slab - 1][seq].second;
- if (!tight || slab < (num_threads - 1))
- pieces[slab][seq].second = offsets[slab][seq] - seqs_begin[seq].first;
- else
- {
- // slab == num_threads - 1
- pieces[slab][seq].second = LENGTH(seqs_begin[seq]);
- }
- }
- }
- delete[] offsets;
- }
-
-# pragma omp parallel num_threads(num_threads)
+ parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin,
+ RandomAccessIteratorIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp,
+ _DifferenceTp length, bool stable, bool sentinel)
{
- thread_index_t iam = omp_get_thread_num();
-
- difference_type target_position = 0;
-
- for (int c = 0; c < k; c++)
- target_position += pieces[iam][c].first;
-
- if (k > 2)
- {
- std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks = new std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
-
- difference_type local_length = 0;
- for (int s = 0; s < k; s++)
- {
- chunks[s] = std::make_pair(seqs_begin[s].first + pieces[iam][s].first, seqs_begin[s].first + pieces[iam][s].second);
- local_length += LENGTH(chunks[s]);
- }
-
- multiway_merge(chunks, chunks + k, target + target_position, comp,
- std::min(local_length, length - target_position),
- stable, false, sequential_tag());
-
- delete[] chunks;
- }
- else if (k == 2)
- {
- RandomAccessIterator1 begin0 = seqs_begin[0].first + pieces[iam][0].first, begin1 = seqs_begin[1].first + pieces[iam][1].first;
- merge_advance(begin0,
- seqs_begin[0].first + pieces[iam][0].second,
- begin1,
- seqs_begin[1].first + pieces[iam][1].second,
- target + target_position,
- (pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first),
- comp);
- }
- }
+ _GLIBCXX_CALL(length)
+
+ typedef _DifferenceTp difference_type;
+ typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+ ::value_type::first_type
+ RandomAccessIterator1;
+ typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+ value_type;
+
+ // k sequences.
+ int k = static_cast<int>(seqs_end - seqs_begin);
+
+ difference_type total_length = 0;
+ for (RandomAccessIteratorIterator raii = seqs_begin;
+ raii != seqs_end; raii++)
+ total_length += LENGTH(*raii);
+
+ _GLIBCXX_CALL(total_length)
+
+ if (total_length == 0 || k == 0)
+ return target;
+
+ bool tight = (total_length == length);
+
+ std::vector<std::pair<difference_type, difference_type> >* pieces;
+
+ thread_index_t num_threads = static_cast<thread_index_t>(
+ std::min<difference_type>(get_max_threads(), total_length));
+
+# pragma omp parallel num_threads (num_threads)
+ {
+# pragma omp single
+ {
+ num_threads = omp_get_num_threads();
+ // Thread t will have to merge pieces[iam][0..k - 1]
+ pieces = new std::vector<
+ std::pair<difference_type, difference_type> >[num_threads];
+ for (int s = 0; s < num_threads; s++)
+ pieces[s].resize(k);
+
+ difference_type num_samples =
+ Settings::merge_oversampling * num_threads;
+
+ if (Settings::multiway_merge_splitting == Settings::SAMPLING)
+ {
+ value_type* samples = static_cast<value_type*>(
+ ::operator new(sizeof(value_type) * k * num_samples));
+ // Sample.
+ for (int s = 0; s < k; s++)
+ for (int i = 0; (difference_type)i < num_samples; i++)
+ {
+ difference_type sample_index =
+ static_cast<difference_type>(
+ LENGTH(seqs_begin[s]) * (double(i + 1) /
+ (num_samples + 1)) * (double(length)
+ / total_length));
+ samples[s * num_samples + i] =
+ seqs_begin[s].first[sample_index];
+ }
+
+ if (stable)
+ __gnu_sequential::stable_sort(
+ samples, samples + (num_samples * k), comp);
+ else
+ __gnu_sequential::sort(
+ samples, samples + (num_samples * k), comp);
+
+ for (int slab = 0; slab < num_threads; slab++)
+ // For each slab / processor.
+ for (int seq = 0; seq < k; seq++)
+ {
+ // For each sequence.
+ if (slab > 0)
+ pieces[slab][seq].first =
+ std::upper_bound(
+ seqs_begin[seq].first,
+ seqs_begin[seq].second,
+ samples[num_samples * k * slab / num_threads],
+ comp)
+ - seqs_begin[seq].first;
+ else
+ {
+ // Absolute beginning.
+ pieces[slab][seq].first = 0;
+ }
+ if ((slab + 1) < num_threads)
+ pieces[slab][seq].second =
+ std::upper_bound(
+ seqs_begin[seq].first,
+ seqs_begin[seq].second,
+ samples[num_samples * k * (slab + 1) /
+ num_threads], comp)
+ - seqs_begin[seq].first;
+ else
+ pieces[slab][seq].second = LENGTH(seqs_begin[seq]);
+ }
+ delete[] samples;
+ }
+ else
+ {
+ // (Settings::multiway_merge_splitting == Settings::EXACT).
+ std::vector<RandomAccessIterator1>* offsets =
+ new std::vector<RandomAccessIterator1>[num_threads];
+ std::vector<
+ std::pair<RandomAccessIterator1, RandomAccessIterator1>
+ > se(k);
+
+ copy(seqs_begin, seqs_end, se.begin());
+
+ difference_type* borders =
+ new difference_type[num_threads + 1];
+ equally_split(length, num_threads, borders);
+
+ for (int s = 0; s < (num_threads - 1); s++)
+ {
+ offsets[s].resize(k);
+ multiseq_partition(
+ se.begin(), se.end(), borders[s + 1],
+ offsets[s].begin(), comp);
+
+ // Last one also needed and available.
+ if (!tight)
+ {
+ offsets[num_threads - 1].resize(k);
+ multiseq_partition(se.begin(), se.end(),
+ difference_type(length),
+ offsets[num_threads - 1].begin(), comp);
+ }
+ }
+
+
+ for (int slab = 0; slab < num_threads; slab++)
+ {
+ // For each slab / processor.
+ for (int seq = 0; seq < k; seq++)
+ {
+ // For each sequence.
+ if (slab == 0)
+ {
+ // Absolute beginning.
+ pieces[slab][seq].first = 0;
+ }
+ else
+ pieces[slab][seq].first =
+ pieces[slab - 1][seq].second;
+ if (!tight || slab < (num_threads - 1))
+ pieces[slab][seq].second =
+ offsets[slab][seq] - seqs_begin[seq].first;
+ else
+ {
+ // slab == num_threads - 1
+ pieces[slab][seq].second =
+ LENGTH(seqs_begin[seq]);
+ }
+ }
+ }
+ delete[] offsets;
+ }
+ } //single
+
+ thread_index_t iam = omp_get_thread_num();
+
+ difference_type target_position = 0;
+
+ for (int c = 0; c < k; c++)
+ target_position += pieces[iam][c].first;
+
+ if (k > 2)
+ {
+ std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks
+ = new
+ std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
+
+ difference_type local_length = 0;
+ for (int s = 0; s < k; s++)
+ {
+ chunks[s] = std::make_pair(
+ seqs_begin[s].first + pieces[iam][s].first,
+ seqs_begin[s].first + pieces[iam][s].second);
+ local_length += LENGTH(chunks[s]);
+ }
+
+ multiway_merge(
+ chunks, chunks + k, target + target_position, comp,
+ std::min(local_length, length - target_position),
+ stable, false, sequential_tag());
+
+ delete[] chunks;
+ }
+ else if (k == 2)
+ {
+ RandomAccessIterator1
+ begin0 = seqs_begin[0].first + pieces[iam][0].first,
+ begin1 = seqs_begin[1].first + pieces[iam][1].first;
+ merge_advance(begin0,
+ seqs_begin[0].first + pieces[iam][0].second,
+ begin1,
+ seqs_begin[1].first + pieces[iam][1].second,
+ target + target_position,
+ (pieces[iam][0].second - pieces[iam][0].first) +
+ (pieces[iam][1].second - pieces[iam][1].first),
+ comp);
+ }
+ } //parallel
#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
+ _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
#endif
- // Update ends of sequences.
- for (int s = 0; s < k; s++)
- seqs_begin[s].first += pieces[num_threads - 1][s].second;
+ // Update ends of sequences.
+ for (int s = 0; s < k; s++)
+ seqs_begin[s].first += pieces[num_threads - 1][s].second;
- delete[] pieces;
+ delete[] pieces;
- return target + length;
- }
+ return target + length;
+ }
- /**
- * @brief Multi-way merging front-end.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @return End iterator of output sequence.
- */
- template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/**
+ * @brief Multi-way merging front-end.
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ * @param stable Stable merging incurs a performance penalty.
+ * @return End iterator of output sequence.
+ */
+template<
+ typename RandomAccessIteratorPairIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
- RandomAccessIteratorPairIterator seqs_end,
- RandomAccessIterator3 target, Comparator comp,
- _DifferenceTp length, bool stable)
+ RandomAccessIteratorPairIterator seqs_end,
+ RandomAccessIterator3 target, Comparator comp,
+ _DifferenceTp length, bool stable)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
@@ -1535,33 +1770,43 @@ namespace __gnu_parallel
return target;
RandomAccessIterator3 target_end;
- if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
- target_end = parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (difference_type)length, stable, false);
+ if (_GLIBCXX_PARALLEL_CONDITION(
+ ((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k)
+ && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
+ target_end = parallel_multiway_merge(
+ seqs_begin, seqs_end,
+ target, comp, static_cast<difference_type>(length), stable, false);
else
- target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, false, sequential_tag());
+ target_end = multiway_merge(
+ seqs_begin, seqs_end,
+ target, comp, length, stable, false, sequential_tag());
return target_end;
}
- /** @brief Multi-way merging front-end.
- * @param seqs_begin Begin iterator of iterator pair input sequence.
- * @param seqs_end End iterator of iterator pair input sequence.
- * @param target Begin iterator out output sequence.
- * @param comp Comparator.
- * @param length Maximum length to merge.
- * @param stable Stable merging incurs a performance penalty.
- * @return End iterator of output sequence.
- * @pre For each @c i, @c seqs_begin[i].second must be the end
- * marker of the sequence, but also reference the one more sentinel
- * element. */
- template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Multi-way merging front-end.
+ * @param seqs_begin Begin iterator of iterator pair input sequence.
+ * @param seqs_end End iterator of iterator pair input sequence.
+ * @param target Begin iterator out output sequence.
+ * @param comp Comparator.
+ * @param length Maximum length to merge.
+ * @param stable Stable merging incurs a performance penalty.
+ * @return End iterator of output sequence.
+ * @pre For each @c i, @c seqs_begin[i].second must be the end
+ * marker of the sequence, but also reference the one more sentinel
+ * element. */
+template<
+ typename RandomAccessIteratorPairIterator,
+ typename RandomAccessIterator3,
+ typename _DifferenceTp,
+ typename Comparator>
RandomAccessIterator3
multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
- RandomAccessIteratorPairIterator seqs_end,
- RandomAccessIterator3 target,
- Comparator comp,
- _DifferenceTp length,
- bool stable)
+ RandomAccessIteratorPairIterator seqs_end,
+ RandomAccessIterator3 target,
+ Comparator comp,
+ _DifferenceTp length,
+ bool stable)
{
typedef _DifferenceTp difference_type;
@@ -1570,10 +1815,16 @@ namespace __gnu_parallel
_GLIBCXX_CALL(seqs_end - seqs_begin)
- if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
- return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (typename std::iterator_traits<RandomAccessIterator3>::difference_type)length, stable, true);
+ if (_GLIBCXX_PARALLEL_CONDITION(
+ ((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k)
+ && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
+ return parallel_multiway_merge(
+ seqs_begin, seqs_end,
+ target, comp, static_cast<difference_type>(length), stable, true);
else
- return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, true, sequential_tag());
+ return multiway_merge(
+ seqs_begin, seqs_end,
+ target, comp, length, stable, true, sequential_tag());
}
}
diff --git a/libstdc++-v3/include/parallel/multiway_mergesort.h b/libstdc++-v3/include/parallel/multiway_mergesort.h
index 89285e1..5dedd00 100644
--- a/libstdc++-v3/include/parallel/multiway_mergesort.h
+++ b/libstdc++-v3/include/parallel/multiway_mergesort.h
@@ -48,8 +48,8 @@
namespace __gnu_parallel
{
- /** @brief Subsequence description. */
- template<typename _DifferenceTp>
+/** @brief Subsequence description. */
+template<typename _DifferenceTp>
struct Piece
{
typedef _DifferenceTp difference_type;
@@ -61,16 +61,19 @@ namespace __gnu_parallel
difference_type end;
};
- /** @brief Data accessed by all threads.
- *
- * PMWMS = parallel multiway mergesort */
- template<typename RandomAccessIterator>
+/** @brief Data accessed by all threads.
+ *
+ * PMWMS = parallel multiway mergesort */
+template<typename RandomAccessIterator>
struct PMWMSSortingData
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
+ /** @brief Number of threads involved. */
+ thread_index_t num_threads;
+
/** @brief Input begin. */
RandomAccessIterator source;
@@ -105,62 +108,55 @@ namespace __gnu_parallel
/** @brief Pieces of data to merge @c [thread][sequence] */
std::vector<Piece<difference_type> >* pieces;
- };
- /** @brief Thread local data for PMWMS. */
- template<typename RandomAccessIterator>
- struct PMWMSSorterPU
- {
- /** @brief Total number of thread involved. */
- thread_index_t num_threads;
- /** @brief Number of owning thread. */
- thread_index_t iam;
/** @brief Stable sorting desired. */
bool stable;
- /** @brief Pointer to global data. */
- PMWMSSortingData<RandomAccessIterator>* sd;
- };
-
- /**
- * @brief Select samples from a sequence.
- * @param d Pointer to thread-local data. Result will be placed in
- * @c d->ds->samples.
- * @param num_samples Number of samples to select.
- */
- template<typename RandomAccessIterator, typename _DifferenceTp>
+};
+
+/**
+ * @brief Select samples from a sequence.
+ * @param sd Pointer to algorithm data. Result will be placed in
+ * @c sd->samples.
+ * @param num_samples Number of samples to select.
+ */
+template<typename RandomAccessIterator, typename _DifferenceTp>
inline void
- determine_samples(PMWMSSorterPU<RandomAccessIterator>* d,
- _DifferenceTp& num_samples)
+ determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
+ _DifferenceTp& num_samples)
{
typedef _DifferenceTp difference_type;
- PMWMSSortingData<RandomAccessIterator>* sd = d->sd;
+ thread_index_t iam = omp_get_thread_num();
- num_samples = Settings::sort_mwms_oversampling * d->num_threads - 1;
+ num_samples =
+ Settings::sort_mwms_oversampling * sd->num_threads - 1;
- difference_type* es = static_cast<difference_type*>(__builtin_alloca(sizeof(difference_type) * (num_samples + 2)));
+ difference_type* es = new difference_type[num_samples + 2];
- equally_split(sd->starts[d->iam + 1] - sd->starts[d->iam], num_samples + 1, es);
+ equally_split(sd->starts[iam + 1] - sd->starts[iam],
+ num_samples + 1, es);
for (difference_type i = 0; i < num_samples; i++)
- sd->samples[d->iam * num_samples + i] = sd->source[sd->starts[d->iam] + es[i + 1]];
+ sd->samples[iam * num_samples + i] =
+ sd->source[sd->starts[iam] + es[i + 1]];
+
+ delete[] es;
}
- /** @brief PMWMS code executed by each thread.
- * @param d Pointer to thread-local data.
- * @param comp Comparator.
- */
- template<typename RandomAccessIterator, typename Comparator>
+/** @brief PMWMS code executed by each thread.
+ * @param sd Pointer to algorithm data.
+ * @param comp Comparator.
+ */
+template<typename RandomAccessIterator, typename Comparator>
inline void
- parallel_sort_mwms_pu(PMWMSSorterPU<RandomAccessIterator>* d,
- Comparator& comp)
+ parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
+ Comparator& comp)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
- PMWMSSortingData<RandomAccessIterator>* sd = d->sd;
- thread_index_t iam = d->iam;
+ thread_index_t iam = omp_get_thread_num();
// Length of this thread's chunk, before merging.
difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];
@@ -174,161 +170,168 @@ namespace __gnu_parallel
typedef value_type* SortingPlacesIterator;
// Sort in temporary storage, leave space for sentinel.
- sd->sorting_places[iam] = sd->temporaries[iam] = static_cast<value_type*>(::operator new(sizeof(value_type) * (length_local + 1)));
+ sd->sorting_places[iam] = sd->temporaries[iam] =
+ static_cast<value_type*>(
+ ::operator new(sizeof(value_type) * (length_local + 1)));
// Copy there.
- std::uninitialized_copy(sd->source + sd->starts[iam], sd->source + sd->starts[iam] + length_local, sd->sorting_places[iam]);
+ std::uninitialized_copy(sd->source + sd->starts[iam],
+ sd->source + sd->starts[iam] + length_local,
+ sd->sorting_places[iam]);
#endif
// Sort locally.
- if (d->stable)
- __gnu_sequential::stable_sort(sd->sorting_places[iam], sd->sorting_places[iam] + length_local, comp);
+ if (sd->stable)
+ __gnu_sequential::stable_sort(sd->sorting_places[iam],
+ sd->sorting_places[iam] + length_local,
+ comp);
else
- __gnu_sequential::sort(sd->sorting_places[iam], sd->sorting_places[iam] + length_local, comp);
-
-#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(is_sorted(sd->sorting_places[iam], sd->sorting_places[iam] + length_local, comp));
-#endif
+ __gnu_sequential::sort(sd->sorting_places[iam],
+ sd->sorting_places[iam] + length_local,
+ comp);
// Invariant: locally sorted subsequence in sd->sorting_places[iam],
// sd->sorting_places[iam] + length_local.
if (Settings::sort_splitting == Settings::SAMPLING)
{
- difference_type num_samples;
- determine_samples(d, num_samples);
-
-#pragma omp barrier
-
-#pragma omp single
- __gnu_sequential::sort(sd->samples,
- sd->samples + (num_samples * d->num_threads),
- comp);
-
-#pragma omp barrier
-
- for (int s = 0; s < d->num_threads; s++)
- {
- // For each sequence.
- if (num_samples * iam > 0)
- sd->pieces[iam][s].begin = std::lower_bound(sd->sorting_places[s],
- sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s],
- sd->samples[num_samples * iam],
- comp)
- - sd->sorting_places[s];
- else
- // Absolute beginning.
- sd->pieces[iam][s].begin = 0;
-
- if ((num_samples * (iam + 1)) < (num_samples * d->num_threads))
- sd->pieces[iam][s].end = std::lower_bound(sd->sorting_places[s],
- sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s], sd->samples[num_samples * (iam + 1)], comp)
- - sd->sorting_places[s];
- else
- // Absolute end.
- sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
- }
-
+ difference_type num_samples;
+ determine_samples(sd, num_samples);
+
+# pragma omp barrier
+
+# pragma omp single
+ __gnu_sequential::sort(sd->samples,
+ sd->samples + (num_samples * sd->num_threads),
+ comp);
+
+# pragma omp barrier
+
+ for (int s = 0; s < sd->num_threads; s++)
+ {
+ // For each sequence.
+ if (num_samples * iam > 0)
+ sd->pieces[iam][s].begin =
+ std::lower_bound(sd->sorting_places[s],
+ sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s],
+ sd->samples[num_samples * iam],
+ comp)
+ - sd->sorting_places[s];
+ else
+ // Absolute beginning.
+ sd->pieces[iam][s].begin = 0;
+
+ if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
+ sd->pieces[iam][s].end =
+ std::lower_bound(sd->sorting_places[s],
+ sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s],
+ sd->samples[num_samples * (iam + 1)], comp)
+ - sd->sorting_places[s];
+ else
+ // Absolute end.
+ sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
+ }
}
else if (Settings::sort_splitting == Settings::EXACT)
{
-#pragma omp barrier
-
- std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > seqs(d->num_threads);
- for (int s = 0; s < d->num_threads; s++)
- seqs[s] = std::make_pair(sd->sorting_places[s], sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s]);
-
- std::vector<SortingPlacesIterator> offsets(d->num_threads);
-
- // If not last thread.
- if (iam < d->num_threads - 1)
- multiseq_partition(seqs.begin(), seqs.end(), sd->starts[iam + 1], offsets.begin(), comp);
-
- for (int seq = 0; seq < d->num_threads; seq++)
- {
- // For each sequence.
- if (iam < (d->num_threads - 1))
- sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
- else
- // Absolute end of this sequence.
- sd->pieces[iam][seq].end = sd->starts[seq + 1] - sd->starts[seq];
- }
-
-#pragma omp barrier
-
- for (int seq = 0; seq < d->num_threads; seq++)
- {
- // For each sequence.
- if (iam > 0)
- sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
- else
- // Absolute beginning.
- sd->pieces[iam][seq].begin = 0;
- }
+# pragma omp barrier
+
+ std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
+ seqs(sd->num_threads);
+ for (int s = 0; s < sd->num_threads; s++)
+ seqs[s] = std::make_pair(sd->sorting_places[s],
+ sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s]);
+
+ std::vector<SortingPlacesIterator> offsets(sd->num_threads);
+
+ // if not last thread
+ if (iam < sd->num_threads - 1)
+ multiseq_partition(seqs.begin(), seqs.end(),
+ sd->starts[iam + 1], offsets.begin(), comp);
+
+ for (int seq = 0; seq < sd->num_threads; seq++)
+ {
+ // for each sequence
+ if (iam < (sd->num_threads - 1))
+ sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
+ else
+ // very end of this sequence
+ sd->pieces[iam][seq].end = sd->starts[seq + 1] - sd->starts[seq];
+ }
+
+# pragma omp barrier
+
+ for (int seq = 0; seq < sd->num_threads; seq++)
+ {
+ // For each sequence.
+ if (iam > 0)
+ sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
+ else
+ // Absolute beginning.
+ sd->pieces[iam][seq].begin = 0;
+ }
}
// Offset from target begin, length after merging.
difference_type offset = 0, length_am = 0;
- for (int s = 0; s < d->num_threads; s++)
+ for (int s = 0; s < sd->num_threads; s++)
{
- length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
- offset += sd->pieces[iam][s].begin;
+ length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
+ offset += sd->pieces[iam][s].begin;
}
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
// Merge to temporary storage, uninitialized creation not possible
// since there is no multiway_merge calling the placement new
// instead of the assignment operator.
- sd->merging_places[iam] = sd->temporaries[iam] = static_cast<value_type*>(::operator new(sizeof(value_type) * length_am));
+ sd->merging_places[iam] = sd->temporaries[iam] =
+ static_cast<value_type*>(
+ ::operator new(sizeof(value_type) * length_am));
#else
// Merge directly to target.
sd->merging_places[iam] = sd->source + offset;
#endif
- std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > seqs(d->num_threads);
+ std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
+ seqs(sd->num_threads);
- for (int s = 0; s < d->num_threads; s++)
+ for (int s = 0; s < sd->num_threads; s++)
{
- seqs[s] = std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin, sd->sorting_places[s] + sd->pieces[iam][s].end);
-
-#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(is_sorted(seqs[s].first, seqs[s].second, comp));
-#endif
+ seqs[s] = std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin,
+ sd->sorting_places[s] + sd->pieces[iam][s].end);
}
- multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, length_am, d->stable, false, sequential_tag());
-
-#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(is_sorted(sd->merging_places[iam], sd->merging_places[iam] + length_am, comp));
-#endif
+ multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, length_am, sd->stable, false, sequential_tag());
-# pragma omp barrier
+# pragma omp barrier
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
// Write back.
- std::copy(sd->merging_places[iam], sd->merging_places[iam] + length_am,
- sd->source + offset);
+ std::copy(sd->merging_places[iam],
+ sd->merging_places[iam] + length_am,
+ sd->source + offset);
#endif
delete[] sd->temporaries[iam];
}
- /** @brief PMWMS main call.
- * @param begin Begin iterator of sequence.
- * @param end End iterator of sequence.
- * @param comp Comparator.
- * @param n Length of sequence.
- * @param num_threads Number of threads to use.
- * @param stable Stable sorting.
- */
- template<typename RandomAccessIterator, typename Comparator>
+/** @brief PMWMS main call.
+ * @param begin Begin iterator of sequence.
+ * @param end End iterator of sequence.
+ * @param comp Comparator.
+ * @param n Length of sequence.
+ * @param num_threads Number of threads to use.
+ * @param stable Stable sorting.
+ */
+template<typename RandomAccessIterator, typename Comparator>
inline void
- parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
- Comparator comp,
- typename std::iterator_traits<RandomAccessIterator>::difference_type n,
- int num_threads, bool stable)
+ parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
+ Comparator comp,
+ typename std::iterator_traits<RandomAccessIterator>::difference_type n,
+ int num_threads,
+ bool stable)
{
_GLIBCXX_CALL(n)
-
+
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
@@ -336,75 +339,75 @@ namespace __gnu_parallel
if (n <= 1)
return;
- // At least one element per thread.
+ // at least one element per thread
if (num_threads > n)
num_threads = static_cast<thread_index_t>(n);
+ // shared variables
PMWMSSortingData<RandomAccessIterator> sd;
+ difference_type* starts;
- sd.source = begin;
- sd.temporaries = new value_type*[num_threads];
+# pragma omp parallel num_threads(num_threads)
+ {
+ num_threads = omp_get_num_threads(); //no more threads than requested
+
+# pragma omp single
+ {
+ sd.num_threads = num_threads;
+ sd.source = begin;
+ sd.temporaries = new value_type*[num_threads];
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
- sd.sorting_places = new RandomAccessIterator[num_threads];
- sd.merging_places = new value_type*[num_threads];
+ sd.sorting_places = new RandomAccessIterator[num_threads];
+ sd.merging_places = new value_type*[num_threads];
#else
- sd.sorting_places = new value_type*[num_threads];
- sd.merging_places = new RandomAccessIterator[num_threads];
+ sd.sorting_places = new value_type*[num_threads];
+ sd.merging_places = new RandomAccessIterator[num_threads];
#endif
- if (Settings::sort_splitting == Settings::SAMPLING)
- {
- unsigned int sz = Settings::sort_mwms_oversampling * num_threads - 1;
- sz *= num_threads;
-
- // Equivalent to value_type[sz], without need of default construction.
- sz *= sizeof(value_type);
- sd.samples = static_cast<value_type*>(::operator new(sz));
- }
- else
- sd.samples = NULL;
-
- sd.offsets = new difference_type[num_threads - 1];
- sd.pieces = new std::vector<Piece<difference_type> >[num_threads];
- for (int s = 0; s < num_threads; s++)
- sd.pieces[s].resize(num_threads);
- PMWMSSorterPU<RandomAccessIterator>* pus = new PMWMSSorterPU<RandomAccessIterator>[num_threads];
- difference_type* starts = sd.starts = new difference_type[num_threads + 1];
-
- difference_type chunk_length = n / num_threads;
- difference_type split = n % num_threads;
- difference_type start = 0;
- for (int i = 0; i < num_threads; i++)
- {
- starts[i] = start;
- start += (i < split) ? (chunk_length + 1) : chunk_length;
- pus[i].num_threads = num_threads;
- pus[i].iam = i;
- pus[i].sd = &sd;
- pus[i].stable = stable;
- }
- starts[num_threads] = start;
-
- // Now sort in parallel.
-#pragma omp parallel num_threads(num_threads)
- parallel_sort_mwms_pu(&(pus[omp_get_thread_num()]), comp);
+ if (Settings::sort_splitting == Settings::SAMPLING)
+ {
+ unsigned int size =
+ (Settings::sort_mwms_oversampling * num_threads - 1) * num_threads;
+ sd.samples = static_cast<value_type*>(
+ ::operator new(size * sizeof(value_type)));
+ }
+ else
+ sd.samples = NULL;
+
+ sd.offsets = new difference_type[num_threads - 1];
+ sd.pieces = new std::vector<Piece<difference_type> >[num_threads];
+ for (int s = 0; s < num_threads; s++)
+ sd.pieces[s].resize(num_threads);
+ starts = sd.starts = new difference_type[num_threads + 1];
+ sd.stable = stable;
+
+ difference_type chunk_length = n / num_threads;
+ difference_type split = n % num_threads;
+ difference_type pos = 0;
+ for (int i = 0; i < num_threads; i++)
+ {
+ starts[i] = pos;
+ pos += (i < split) ? (chunk_length + 1) : chunk_length;
+ }
+ starts[num_threads] = pos;
+ }
+
+ // Now sort in parallel.
+ parallel_sort_mwms_pu(&sd, comp);
+ } //parallel
- // XXX sd as RAII
delete[] starts;
delete[] sd.temporaries;
delete[] sd.sorting_places;
delete[] sd.merging_places;
if (Settings::sort_splitting == Settings::SAMPLING)
- delete[] sd.samples;
+ delete[] sd.samples;
delete[] sd.offsets;
delete[] sd.pieces;
-
- delete[] pus;
}
-
-}
+} //namespace __gnu_parallel
#endif
diff --git a/libstdc++-v3/include/parallel/omp_loop.h b/libstdc++-v3/include/parallel/omp_loop.h
index 23fe6f4..0a992b0 100644
--- a/libstdc++-v3/include/parallel/omp_loop.h
+++ b/libstdc++-v3/include/parallel/omp_loop.h
@@ -43,54 +43,71 @@
#include <parallel/settings.h>
#include <parallel/basic_iterator.h>
+#include <parallel/base.h>
namespace __gnu_parallel
{
- /** @brief Embarrassingly parallel algorithm for random access
- * iterators, using an OpenMP for loop.
- *
- * @param begin Begin iterator of element sequence.
- * @param end End iterator of element sequence.
- * @param o User-supplied functor (comparator, predicate, adding
- * functor, etc.).
- * @param f Functor to "process" an element with op (depends on
- * desired functionality, e. g. for std::for_each(), ...).
- * @param r Functor to "add" a single result to the already
- * processed elements (depends on functionality).
- * @param base Base value for reduction.
- * @param output Pointer to position where final result is written to
- * @param bound Maximum number of elements processed (e. g. for
- * std::count_n()).
- * @return User-supplied functor (that may contain a part of the result).
- */
- template<typename RandomAccessIterator, typename Op, typename Fu, typename Red, typename Result>
+/** @brief Embarrassingly parallel algorithm for random access
+ * iterators, using an OpenMP for loop.
+ *
+ * @param begin Begin iterator of element sequence.
+ * @param end End iterator of element sequence.
+ * @param o User-supplied functor (comparator, predicate, adding
+ * functor, etc.).
+ * @param f Functor to "process" an element with op (depends on
+ * desired functionality, e. g. for std::for_each(), ...).
+ * @param r Functor to "add" a single result to the already
+ * processed elements (depends on functionality).
+ * @param base Base value for reduction.
+ * @param output Pointer to position where final result is written to
+ * @param bound Maximum number of elements processed (e. g. for
+ * std::count_n()).
+ * @return User-supplied functor (that may contain a part of the result).
+ */
+template<typename RandomAccessIterator,
+ typename Op,
+ typename Fu,
+ typename Red,
+ typename Result>
Op
- for_each_template_random_access_omp_loop(RandomAccessIterator begin, RandomAccessIterator end, Op o, Fu& f, Red r, Result base, Result& output, typename std::iterator_traits<RandomAccessIterator>::difference_type bound)
+ for_each_template_random_access_omp_loop(
+ RandomAccessIterator begin,
+ RandomAccessIterator end,
+ Op o, Fu& f, Red r, Result base, Result& output,
+ typename std::iterator_traits<RandomAccessIterator>::
+ difference_type bound)
{
- typedef typename std::iterator_traits<RandomAccessIterator>::difference_type difference_type;
+ typedef typename
+ std::iterator_traits<RandomAccessIterator>::difference_type
+ difference_type;
- thread_index_t num_threads = (get_max_threads() < (end - begin)) ? get_max_threads() : static_cast<thread_index_t>((end - begin));
- Result *thread_results = new Result[num_threads];
difference_type length = end - begin;
+ thread_index_t num_threads =
+ __gnu_parallel::min<difference_type>(get_max_threads(), length);
- for (thread_index_t i = 0; i < num_threads; i++)
+ Result *thread_results;
+
+# pragma omp parallel num_threads(num_threads)
{
- thread_results[i] = r(thread_results[i], f(o, begin+i));
- }
-
-#pragma omp parallel num_threads(num_threads)
- {
-#pragma omp for schedule(dynamic, Settings::workstealing_chunk_size)
- for (difference_type pos = 0; pos < length; pos++)
- {
- thread_results[omp_get_thread_num()] = r(thread_results[omp_get_thread_num()], f(o, begin+pos));
- }
- }
+# pragma omp single
+ {
+ num_threads = omp_get_num_threads();
+ thread_results = new Result[num_threads];
+
+ for (thread_index_t i = 0; i < num_threads; i++)
+ thread_results[i] = Result();
+ }
+
+ thread_index_t iam = omp_get_thread_num();
+
+# pragma omp for schedule(dynamic, Settings::workstealing_chunk_size)
+ for (difference_type pos = 0; pos < length; pos++)
+ thread_results[iam] =
+ r(thread_results[iam], f(o, begin+pos));
+ } //parallel
for (thread_index_t i = 0; i < num_threads; i++)
- {
- output = r(output, thread_results[i]);
- }
+ output = r(output, thread_results[i]);
delete [] thread_results;
@@ -100,6 +117,7 @@ namespace __gnu_parallel
return o;
}
+
} // end namespace
#endif
diff --git a/libstdc++-v3/include/parallel/omp_loop_static.h b/libstdc++-v3/include/parallel/omp_loop_static.h
index 22acb2d..df2c3b5 100644
--- a/libstdc++-v3/include/parallel/omp_loop_static.h
+++ b/libstdc++-v3/include/parallel/omp_loop_static.h
@@ -64,39 +64,50 @@ namespace __gnu_parallel
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
- template<typename RandomAccessIterator, typename Op, typename Fu, typename Red, typename Result>
+template<typename RandomAccessIterator,
+ typename Op,
+ typename Fu,
+ typename Red,
+ typename Result>
Op
- for_each_template_random_access_omp_loop_static(RandomAccessIterator begin,
- RandomAccessIterator end,
- Op o, Fu& f, Red r,
- Result base, Result& output,
- typename std::iterator_traits<RandomAccessIterator>::difference_type bound)
+ for_each_template_random_access_omp_loop_static(
+ RandomAccessIterator begin,
+ RandomAccessIterator end,
+ Op o, Fu& f, Red r, Result base, Result& output,
+ typename std::iterator_traits<RandomAccessIterator>::
+ difference_type bound)
{
- typedef std::iterator_traits<RandomAccessIterator> traits_type;
- typedef typename traits_type::difference_type difference_type;
+ typedef typename
+ std::iterator_traits<RandomAccessIterator>::difference_type
+ difference_type;
- thread_index_t num_threads = (get_max_threads() < (end - begin)) ? get_max_threads() : (end - begin);
- Result *thread_results = new Result[num_threads];
difference_type length = end - begin;
+ thread_index_t num_threads =
+ std::min<difference_type>(get_max_threads(), length);
- for (thread_index_t i = 0; i < num_threads; i++)
+ Result *thread_results;
+
+# pragma omp parallel num_threads(num_threads)
{
- thread_results[i] = r(thread_results[i], f(o, begin+i));
- }
-
-#pragma omp parallel num_threads(num_threads)
- {
-#pragma omp for schedule(static, Settings::workstealing_chunk_size)
- for (difference_type pos = 0; pos < length; pos++)
- {
- thread_results[omp_get_thread_num()] = r(thread_results[omp_get_thread_num()], f(o, begin+pos));
- }
- }
+# pragma omp single
+ {
+ num_threads = omp_get_num_threads();
+ thread_results = new Result[num_threads];
+
+ for (thread_index_t i = 0; i < num_threads; i++)
+ thread_results[i] = Result();
+ }
+
+ thread_index_t iam = omp_get_thread_num();
+
+# pragma omp for schedule(static, Settings::workstealing_chunk_size)
+ for (difference_type pos = 0; pos < length; pos++)
+ thread_results[iam] =
+ r(thread_results[iam], f(o, begin+pos));
+ } //parallel
for (thread_index_t i = 0; i < num_threads; i++)
- {
- output = r(output, thread_results[i]);
- }
+ output = r(output, thread_results[i]);
delete [] thread_results;
@@ -106,6 +117,7 @@ namespace __gnu_parallel
return o;
}
+
} // end namespace
#endif
diff --git a/libstdc++-v3/include/parallel/par_loop.h b/libstdc++-v3/include/parallel/par_loop.h
index 98604cf..6954e74 100644
--- a/libstdc++-v3/include/parallel/par_loop.h
+++ b/libstdc++-v3/include/parallel/par_loop.h
@@ -41,69 +41,80 @@
#include <omp.h>
#include <parallel/settings.h>
+#include <parallel/base.h>
namespace __gnu_parallel
{
- /** @brief Embarrassingly parallel algorithm for random access
- * iterators, using hand-crafted parallelization by equal splitting
- * the work.
- *
- * @param begin Begin iterator of element sequence.
- * @param end End iterator of element sequence.
- * @param o User-supplied functor (comparator, predicate, adding
- * functor, ...)
- * @param f Functor to "process" an element with op (depends on
- * desired functionality, e. g. for std::for_each(), ...).
- * @param r Functor to "add" a single result to the already
- * processed elements (depends on functionality).
- * @param base Base value for reduction.
- * @param output Pointer to position where final result is written to
- * @param bound Maximum number of elements processed (e. g. for
- * std::count_n()).
- * @return User-supplied functor (that may contain a part of the result).
- */
- template<typename RandomAccessIterator, typename Op, typename Fu, typename Red, typename Result>
+/** @brief Embarrassingly parallel algorithm for random access
+ * iterators, using hand-crafted parallelization by equal splitting
+ * the work.
+ *
+ * @param begin Begin iterator of element sequence.
+ * @param end End iterator of element sequence.
+ * @param o User-supplied functor (comparator, predicate, adding
+ * functor, ...)
+ * @param f Functor to "process" an element with op (depends on
+ * desired functionality, e. g. for std::for_each(), ...).
+ * @param r Functor to "add" a single result to the already
+ * processed elements (depends on functionality).
+ * @param base Base value for reduction.
+ * @param output Pointer to position where final result is written to
+ * @param bound Maximum number of elements processed (e. g. for
+ * std::count_n()).
+ * @return User-supplied functor (that may contain a part of the result).
+ */
+template<
+ typename RandomAccessIterator,
+ typename Op,
+ typename Fu,
+ typename Red,
+ typename Result>
Op
- for_each_template_random_access_ed(RandomAccessIterator begin,
- RandomAccessIterator end, Op o, Fu& f,
- Red r, Result base, Result& output,
- typename std::iterator_traits<RandomAccessIterator>::difference_type bound)
+ for_each_template_random_access_ed(
+ RandomAccessIterator begin,
+ RandomAccessIterator end,
+ Op o, Fu& f, Red r, Result base, Result& output,
+ typename std::iterator_traits<RandomAccessIterator>::
+ difference_type bound)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
const difference_type length = end - begin;
- const difference_type settings_threads = static_cast<difference_type>(get_max_threads());
- const difference_type dmin = settings_threads < length ? settings_threads : length;
- const difference_type dmax = dmin > 1 ? dmin : 1;
+ Result *thread_results;
- thread_index_t num_threads = static_cast<thread_index_t>(dmax);
+ thread_index_t num_threads =
+ __gnu_parallel::min<difference_type>(get_max_threads(), length);
+# pragma omp parallel num_threads(num_threads)
+ {
+# pragma omp single
+ {
+ num_threads = omp_get_num_threads();
+ thread_results = new Result[num_threads];
+ }
- Result *thread_results = new Result[num_threads];
+ thread_index_t iam = omp_get_thread_num();
-#pragma omp parallel num_threads(num_threads)
- {
- // Neutral element.
- Result reduct = Result();
+ // Neutral element.
+ Result reduct = Result();
- thread_index_t p = num_threads;
- thread_index_t iam = omp_get_thread_num();
- difference_type start = iam * length / p;
- difference_type limit = (iam == p - 1) ? length : (iam + 1) * length / p;
+ difference_type
+ start = equally_split_point(length, num_threads, iam),
+ stop = equally_split_point(length, num_threads, iam + 1);
- if (start < limit)
- {
- reduct = f(o, begin + start);
- start++;
- }
+ if (start < stop)
+ {
+ reduct = f(o, begin + start);
+ ++start;
+ }
- for (; start < limit; start++)
- reduct = r(reduct, f(o, begin + start));
+ for (; start < stop; ++start)
+ reduct = r(reduct, f(o, begin + start));
- thread_results[iam] = reduct;
- }
+ thread_results[iam] = reduct;
+ } //parallel
for (thread_index_t i = 0; i < num_threads; i++)
output = r(output, thread_results[i]);
diff --git a/libstdc++-v3/include/parallel/partial_sum.h b/libstdc++-v3/include/parallel/partial_sum.h
index e7652c0..3dfce86 100644
--- a/libstdc++-v3/include/parallel/partial_sum.h
+++ b/libstdc++-v3/include/parallel/partial_sum.h
@@ -48,130 +48,156 @@ namespace __gnu_parallel
{
// Problem: there is no 0-element given.
- /** @brief Base case prefix sum routine.
- * @param begin Begin iterator of input sequence.
- * @param end End iterator of input sequence.
- * @param result Begin iterator of output sequence.
- * @param bin_op Associative binary function.
- * @param value Start value. Must be passed since the neutral
- * element is unknown in general.
- * @return End iterator of output sequence. */
- template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
+/** @brief Base case prefix sum routine.
+ * @param begin Begin iterator of input sequence.
+ * @param end End iterator of input sequence.
+ * @param result Begin iterator of output sequence.
+ * @param bin_op Associative binary function.
+ * @param value Start value. Must be passed since the neutral
+ * element is unknown in general.
+ * @return End iterator of output sequence. */
+template<
+ typename InputIterator,
+ typename OutputIterator,
+ typename BinaryOperation>
inline OutputIterator
- parallel_partial_sum_basecase(InputIterator begin, InputIterator end,
- OutputIterator result, BinaryOperation bin_op,
- typename std::iterator_traits<InputIterator>::value_type value)
+ parallel_partial_sum_basecase(
+ InputIterator begin, InputIterator end,
+ OutputIterator result, BinaryOperation bin_op,
+ typename std::iterator_traits<InputIterator>::value_type value)
{
if (begin == end)
return result;
while (begin != end)
{
- value = bin_op(value, *begin);
- *result = value;
- result++;
- begin++;
+ value = bin_op(value, *begin);
+ *result = value;
+ result++;
+ begin++;
}
return result;
}
- /** @brief Parallel partial sum implementation, two-phase approach,
- no recursion.
- * @param begin Begin iterator of input sequence.
- * @param end End iterator of input sequence.
- * @param result Begin iterator of output sequence.
- * @param bin_op Associative binary function.
- * @param n Length of sequence.
- * @param num_threads Number of threads to use.
- * @return End iterator of output sequence.
- */
- template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
+/** @brief Parallel partial sum implementation, two-phase approach,
+ no recursion.
+ * @param begin Begin iterator of input sequence.
+ * @param end End iterator of input sequence.
+ * @param result Begin iterator of output sequence.
+ * @param bin_op Associative binary function.
+ * @param n Length of sequence.
+ * @param num_threads Number of threads to use.
+ * @return End iterator of output sequence.
+ */
+template<
+ typename InputIterator,
+ typename OutputIterator,
+ typename BinaryOperation>
OutputIterator
- parallel_partial_sum_linear(InputIterator begin, InputIterator end,
- OutputIterator result, BinaryOperation bin_op,
- typename std::iterator_traits<InputIterator>::difference_type n, int num_threads)
+ parallel_partial_sum_linear(
+ InputIterator begin, InputIterator end,
+ OutputIterator result, BinaryOperation bin_op,
+ typename std::iterator_traits<InputIterator>::difference_type n)
{
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
- if (num_threads > (n - 1))
- num_threads = static_cast<thread_index_t>(n - 1);
+ thread_index_t num_threads =
+ std::min<difference_type>(get_max_threads(), n - 1);
+
if (num_threads < 2)
{
- *result = *begin;
- return parallel_partial_sum_basecase(begin + 1, end, result + 1, bin_op, *begin);
+ *result = *begin;
+ return parallel_partial_sum_basecase(
+ begin + 1, end, result + 1, bin_op, *begin);
}
- difference_type* borders = static_cast<difference_type*>(__builtin_alloca(sizeof(difference_type) * (num_threads + 2)));
+ difference_type* borders;
+ value_type* sums;
- if (Settings::partial_sum_dilatation == 1.0f)
- equally_split(n, num_threads + 1, borders);
- else
+# pragma omp parallel num_threads(num_threads)
{
- difference_type chunk_length = (int)((double)n / ((double)num_threads + Settings::partial_sum_dilatation)), borderstart = n - num_threads * chunk_length;
- borders[0] = 0;
- for (int i = 1; i < (num_threads + 1); i++)
- {
- borders[i] = borderstart;
- borderstart += chunk_length;
- }
- borders[num_threads + 1] = n;
- }
-
- value_type* sums = static_cast<value_type*>(::operator new(sizeof(value_type) * num_threads));
- OutputIterator target_end;
-
-#pragma omp parallel num_threads(num_threads)
- {
- int id = omp_get_thread_num();
- if (id == 0)
- {
- *result = *begin;
- parallel_partial_sum_basecase(begin + 1, begin + borders[1],
- result + 1, bin_op, *begin);
- sums[0] = *(result + borders[1] - 1);
- }
- else
- {
- sums[id] = std::accumulate(begin + borders[id] + 1,
- begin + borders[id + 1],
- *(begin + borders[id]),
- bin_op, __gnu_parallel::sequential_tag());
- }
-
-#pragma omp barrier
-
-#pragma omp single
- parallel_partial_sum_basecase(sums + 1, sums + num_threads, sums + 1,
- bin_op, sums[0]);
-
-#pragma omp barrier
-
- // Still same team.
- parallel_partial_sum_basecase(begin + borders[id + 1],
- begin + borders[id + 2],
- result + borders[id + 1], bin_op,
- sums[id]);
- }
-
- delete [] sums;
+# pragma omp single
+ {
+ num_threads = omp_get_num_threads();
+
+ borders = new difference_type[num_threads + 2];
+
+ if (Settings::partial_sum_dilatation == 1.0f)
+ equally_split(n, num_threads + 1, borders);
+ else
+ {
+ difference_type chunk_length =
+ ((double)n /
+ ((double)num_threads + Settings::partial_sum_dilatation)),
+ borderstart = n - num_threads * chunk_length;
+ borders[0] = 0;
+ for (int i = 1; i < (num_threads + 1); i++)
+ {
+ borders[i] = borderstart;
+ borderstart += chunk_length;
+ }
+ borders[num_threads + 1] = n;
+ }
+
+ sums = static_cast<value_type*>(
+ ::operator new(sizeof(value_type) * num_threads));
+ OutputIterator target_end;
+ } //single
+
+ int iam = omp_get_thread_num();
+ if (iam == 0)
+ {
+ *result = *begin;
+ parallel_partial_sum_basecase(begin + 1, begin + borders[1],
+ result + 1, bin_op, *begin);
+ sums[0] = *(result + borders[1] - 1);
+ }
+ else
+ {
+ sums[iam] = std::accumulate(begin + borders[iam] + 1,
+ begin + borders[iam + 1],
+ *(begin + borders[iam]),
+ bin_op, __gnu_parallel::sequential_tag());
+ }
+
+# pragma omp barrier
+
+# pragma omp single
+ parallel_partial_sum_basecase(
+ sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]);
+
+# pragma omp barrier
+
+ // Still same team.
+ parallel_partial_sum_basecase(begin + borders[iam + 1],
+ begin + borders[iam + 2],
+ result + borders[iam + 1], bin_op,
+ sums[iam]);
+ } //parallel
+
+ delete[] sums;
+ delete[] borders;
return result + n;
}
- /** @brief Parallel partial sum front-end.
- * @param begin Begin iterator of input sequence.
- * @param end End iterator of input sequence.
- * @param result Begin iterator of output sequence.
- * @param bin_op Associative binary function.
- * @return End iterator of output sequence. */
- template<typename InputIterator, typename OutputIterator, typename BinaryOperation>
+/** @brief Parallel partial sum front-end.
+ * @param begin Begin iterator of input sequence.
+ * @param end End iterator of input sequence.
+ * @param result Begin iterator of output sequence.
+ * @param bin_op Associative binary function.
+ * @return End iterator of output sequence. */
+template<
+ typename InputIterator,
+ typename OutputIterator,
+ typename BinaryOperation>
OutputIterator
parallel_partial_sum(InputIterator begin, InputIterator end,
- OutputIterator result, BinaryOperation bin_op)
+ OutputIterator result, BinaryOperation bin_op)
{
- _GLIBCXX_CALL(begin - end);
+ _GLIBCXX_CALL(begin - end)
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type;
@@ -179,18 +205,15 @@ namespace __gnu_parallel
difference_type n = end - begin;
- int num_threads = get_max_threads();
-
switch (Settings::partial_sum_algorithm)
{
case Settings::LINEAR:
- // Need an initial offset.
- return parallel_partial_sum_linear(begin, end, result, bin_op,
- n, num_threads);
+ // Need an initial offset.
+ return parallel_partial_sum_linear(begin, end, result, bin_op, n);
default:
- // Partial_sum algorithm not implemented.
- _GLIBCXX_PARALLEL_ASSERT(0);
- return result + n;
+ // Partial_sum algorithm not implemented.
+ _GLIBCXX_PARALLEL_ASSERT(0);
+ return result + n;
}
}
}
diff --git a/libstdc++-v3/include/parallel/partition.h b/libstdc++-v3/include/parallel/partition.h
index 2b8631d..d6dac37 100644
--- a/libstdc++-v3/include/parallel/partition.h
+++ b/libstdc++-v3/include/parallel/partition.h
@@ -45,21 +45,21 @@
#include <bits/stl_algo.h>
#include <parallel/parallel.h>
-/** @brief Decide whether to declare certain variable volatile in this file. */
+/** @brief Decide whether to declare certain variables volatile. */
#define _GLIBCXX_VOLATILE volatile
namespace __gnu_parallel
{
- /** @brief Parallel implementation of std::partition.
- * @param begin Begin iterator of input sequence to split.
- * @param end End iterator of input sequence to split.
- * @param pred Partition predicate, possibly including some kind of pivot.
- * @param max_num_threads Maximum number of threads to use for this task.
- * @return Number of elements not fulfilling the predicate. */
- template<typename RandomAccessIterator, typename Predicate>
- inline typename std::iterator_traits<RandomAccessIterator>::difference_type
+/** @brief Parallel implementation of std::partition.
+ * @param begin Begin iterator of input sequence to split.
+ * @param end End iterator of input sequence to split.
+ * @param pred Partition predicate, possibly including some kind of pivot.
+ * @param num_threads Maximum number of threads to use for this task.
+ * @return Number of elements not fulfilling the predicate. */
+template<typename RandomAccessIterator, typename Predicate>
+ typename std::iterator_traits<RandomAccessIterator>::difference_type
parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
- Predicate pred, thread_index_t max_num_threads)
+ Predicate pred, thread_index_t num_threads)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
@@ -74,212 +74,238 @@ namespace __gnu_parallel
_GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
_GLIBCXX_VOLATILE difference_type leftnew, rightnew;
- bool* reserved_left, * reserved_right;
-
- reserved_left = new bool[max_num_threads];
- reserved_right = new bool[max_num_threads];
+ bool* reserved_left = NULL, * reserved_right = NULL;
difference_type chunk_size;
- if (Settings::partition_chunk_share > 0.0)
- chunk_size = std::max((difference_type)Settings::partition_chunk_size, (difference_type)((double)n * Settings::partition_chunk_share / (double)max_num_threads));
- else
- chunk_size = Settings::partition_chunk_size;
omp_lock_t result_lock;
omp_init_lock(&result_lock);
- // At least good for two processors.
- while (right - left + 1 >= 2 * max_num_threads * chunk_size)
+ //at least two chunks per thread
+ if(right - left + 1 >= 2 * num_threads * chunk_size)
+# pragma omp parallel num_threads(num_threads)
{
- difference_type num_chunks = (right - left + 1) / chunk_size;
- thread_index_t num_threads = (int)std::min((difference_type)max_num_threads, num_chunks / 2);
-
- for (int r = 0; r < num_threads; r++)
- {
- reserved_left[r] = false;
- reserved_right[r] = false;
- }
- leftover_left = 0;
- leftover_right = 0;
-
-#pragma omp parallel num_threads(num_threads)
- {
- // Private.
- difference_type thread_left, thread_left_border, thread_right, thread_right_border;
- thread_left = left + 1;
-
- // Just to satisfy the condition below.
- thread_left_border = thread_left - 1;
- thread_right = n - 1;
- thread_right_border = thread_right + 1;
-
- bool iam_finished = false;
- while (!iam_finished)
- {
- if (thread_left > thread_left_border)
- {
- omp_set_lock(&result_lock);
- if (left + (chunk_size - 1) > right)
- iam_finished = true;
- else
- {
- thread_left = left;
- thread_left_border = left + (chunk_size - 1);
- left += chunk_size;
- }
- omp_unset_lock(&result_lock);
- }
-
- if (thread_right < thread_right_border)
- {
- omp_set_lock(&result_lock);
- if (left > right - (chunk_size - 1))
- iam_finished = true;
- else
- {
- thread_right = right;
- thread_right_border = right - (chunk_size - 1);
- right -= chunk_size;
- }
- omp_unset_lock(&result_lock);
- }
-
- if (iam_finished)
- break;
-
- // Swap as usual.
- while (thread_left < thread_right)
- {
- while (pred(begin[thread_left]) && thread_left <= thread_left_border)
- thread_left++;
- while (!pred(begin[thread_right]) && thread_right >= thread_right_border)
- thread_right--;
-
- if (thread_left > thread_left_border || thread_right < thread_right_border)
- // Fetch new chunk(s).
- break;
-
- std::swap(begin[thread_left], begin[thread_right]);
- thread_left++;
- thread_right--;
- }
- }
-
- // Now swap the leftover chunks to the right places.
- if (thread_left <= thread_left_border)
-#pragma omp atomic
- leftover_left++;
- if (thread_right >= thread_right_border)
-#pragma omp atomic
- leftover_right++;
-
-#pragma omp barrier
-
-#pragma omp single
- {
- leftnew = left - leftover_left * chunk_size;
- rightnew = right + leftover_right * chunk_size;
- }
-
-#pragma omp barrier
-
- // <=> thread_left_border + (chunk_size - 1) >= leftnew
- if (thread_left <= thread_left_border
- && thread_left_border >= leftnew)
- {
- // Chunk already in place, reserve spot.
- reserved_left[(left - (thread_left_border + 1)) / chunk_size] = true;
- }
-
- // <=> thread_right_border - (chunk_size - 1) <= rightnew
- if (thread_right >= thread_right_border
- && thread_right_border <= rightnew)
- {
- // Chunk already in place, reserve spot.
- reserved_right[((thread_right_border - 1) - right) / chunk_size] = true;
- }
-
-#pragma omp barrier
-
- if (thread_left <= thread_left_border && thread_left_border < leftnew)
- {
- // Find spot and swap.
- difference_type swapstart = -1;
- omp_set_lock(&result_lock);
- for (int r = 0; r < leftover_left; r++)
+# pragma omp single
+ {
+ num_threads = omp_get_num_threads();
+ reserved_left = new bool[num_threads];
+ reserved_right = new bool[num_threads];
+
+ if (Settings::partition_chunk_share > 0.0)
+ chunk_size = std::max<difference_type>(
+ Settings::partition_chunk_size,
+ (double)n * Settings::partition_chunk_share /
+ (double)num_threads);
+ else
+ chunk_size = Settings::partition_chunk_size;
+ }
+
+ while (right - left + 1 >= 2 * num_threads * chunk_size)
+ {
+# pragma omp single
+ {
+ difference_type num_chunks = (right - left + 1) / chunk_size;
+
+ for (int r = 0; r < num_threads; r++)
+ {
+ reserved_left[r] = false;
+ reserved_right[r] = false;
+ }
+ leftover_left = 0;
+ leftover_right = 0;
+ } //implicit barrier
+
+ // Private.
+ difference_type thread_left, thread_left_border,
+ thread_right, thread_right_border;
+ thread_left = left + 1;
+
+ // Just to satisfy the condition below.
+ thread_left_border = thread_left - 1;
+ thread_right = n - 1;
+ thread_right_border = thread_right + 1;
+
+ bool iam_finished = false;
+ while (!iam_finished)
+ {
+ if (thread_left > thread_left_border)
+ {
+ omp_set_lock(&result_lock);
+ if (left + (chunk_size - 1) > right)
+ iam_finished = true;
+ else
+ {
+ thread_left = left;
+ thread_left_border = left + (chunk_size - 1);
+ left += chunk_size;
+ }
+ omp_unset_lock(&result_lock);
+ }
+
+ if (thread_right < thread_right_border)
+ {
+ omp_set_lock(&result_lock);
+ if (left > right - (chunk_size - 1))
+ iam_finished = true;
+ else
+ {
+ thread_right = right;
+ thread_right_border = right - (chunk_size - 1);
+ right -= chunk_size;
+ }
+ omp_unset_lock(&result_lock);
+ }
+
+ if (iam_finished)
+ break;
+
+ // Swap as usual.
+ while (thread_left < thread_right)
+ {
+ while (pred(begin[thread_left])
+ && thread_left <= thread_left_border)
+ thread_left++;
+ while (!pred(begin[thread_right])
+ && thread_right >= thread_right_border)
+ thread_right--;
+
+ if (thread_left > thread_left_border
+ || thread_right < thread_right_border)
+ // Fetch new chunk(s).
+ break;
+
+ std::swap(begin[thread_left], begin[thread_right]);
+ thread_left++;
+ thread_right--;
+ }
+ }
+
+ // Now swap the leftover chunks to the right places.
+ if (thread_left <= thread_left_border)
+# pragma omp atomic
+ leftover_left++;
+ if (thread_right >= thread_right_border)
+# pragma omp atomic
+ leftover_right++;
+
+# pragma omp barrier
+
+# pragma omp single
+ {
+ leftnew = left - leftover_left * chunk_size;
+ rightnew = right + leftover_right * chunk_size;
+ }
+
+# pragma omp barrier
+
+ // <=> thread_left_border + (chunk_size - 1) >= leftnew
+ if (thread_left <= thread_left_border
+ && thread_left_border >= leftnew)
+ {
+ // Chunk already in place, reserve spot.
+ reserved_left[(left - (thread_left_border + 1)) / chunk_size]
+ = true;
+ }
+
+ // <=> thread_right_border - (chunk_size - 1) <= rightnew
+ if (thread_right >= thread_right_border
+ && thread_right_border <= rightnew)
+ {
+ // Chunk already in place, reserve spot.
+ reserved_right
+ [((thread_right_border - 1) - right) / chunk_size]
+ = true;
+ }
+
+# pragma omp barrier
+
+ if (thread_left <= thread_left_border
+ && thread_left_border < leftnew)
+ {
+ // Find spot and swap.
+ difference_type swapstart = -1;
+ omp_set_lock(&result_lock);
+ for (int r = 0; r < leftover_left; r++)
if (!reserved_left[r])
{
reserved_left[r] = true;
swapstart = left - (r + 1) * chunk_size;
break;
}
- omp_unset_lock(&result_lock);
+ omp_unset_lock(&result_lock);
#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
+ _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
#endif
- std::swap_ranges(begin + thread_left_border - (chunk_size - 1), begin + thread_left_border + 1, begin + swapstart);
- }
-
- if (thread_right >= thread_right_border
- && thread_right_border > rightnew)
- {
- // Find spot and swap
- difference_type swapstart = -1;
- omp_set_lock(&result_lock);
- for (int r = 0; r < leftover_right; r++)
- if (!reserved_right[r])
- {
- reserved_right[r] = true;
- swapstart = right + r * chunk_size + 1;
- break;
- }
- omp_unset_lock(&result_lock);
+ std::swap_ranges(
+ begin + thread_left_border - (chunk_size - 1),
+ begin + thread_left_border + 1,
+ begin + swapstart);
+ }
+
+ if (thread_right >= thread_right_border
+ && thread_right_border > rightnew)
+ {
+ // Find spot and swap
+ difference_type swapstart = -1;
+ omp_set_lock(&result_lock);
+ for (int r = 0; r < leftover_right; r++)
+ if (!reserved_right[r])
+ {
+ reserved_right[r] = true;
+ swapstart = right + r * chunk_size + 1;
+ break;
+ }
+ omp_unset_lock(&result_lock);
#if _GLIBCXX_ASSERTIONS
- _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
+ _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
#endif
- std::swap_ranges(begin + thread_right_border, begin + thread_right_border + chunk_size, begin + swapstart);
- }
+ std::swap_ranges(begin + thread_right_border,
+ begin + thread_right_border + chunk_size,
+ begin + swapstart);
+ }
#if _GLIBCXX_ASSERTIONS
-#pragma omp barrier
+# pragma omp barrier
-#pragma omp single
- {
- for (int r = 0; r < leftover_left; r++)
- _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
- for (int r = 0; r < leftover_right; r++)
- _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
- }
+# pragma omp single
+ {
+ for (int r = 0; r < leftover_left; r++)
+ _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
+ for (int r = 0; r < leftover_right; r++)
+ _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
+ }
-#pragma omp barrier
+# pragma omp barrier
#endif
-#pragma omp barrier
- left = leftnew;
- right = rightnew;
- }
- } // end "recursion"
+# pragma omp barrier
+
+ left = leftnew;
+ right = rightnew;
+ }
+# pragma omp flush(left, right)
+ } // end "recursion" //parallel
difference_type final_left = left, final_right = right;
while (final_left < final_right)
{
- // Go right until key is geq than pivot.
- while (pred(begin[final_left]) && final_left < final_right)
- final_left++;
-
- // Go left until key is less than pivot.
- while (!pred(begin[final_right]) && final_left < final_right)
- final_right--;
-
- if (final_left == final_right)
- break;
- std::swap(begin[final_left], begin[final_right]);
- final_left++;
- final_right--;
+ // Go right until key is geq than pivot.
+ while (pred(begin[final_left]) && final_left < final_right)
+ final_left++;
+
+ // Go left until key is less than pivot.
+ while (!pred(begin[final_right]) && final_left < final_right)
+ final_right--;
+
+ if (final_left == final_right)
+ break;
+ std::swap(begin[final_left], begin[final_right]);
+ final_left++;
+ final_right--;
}
// All elements on the left side are < piv, all elements on the
@@ -298,14 +324,14 @@ namespace __gnu_parallel
return final_left + 1;
}
- /**
- * @brief Parallel implementation of std::nth_element().
- * @param begin Begin iterator of input sequence.
- * @param nth Iterator of element that must be in position afterwards.
- * @param end End iterator of input sequence.
- * @param comp Comparator.
- */
- template<typename RandomAccessIterator, typename Comparator>
+/**
+ * @brief Parallel implementation of std::nth_element().
+ * @param begin Begin iterator of input sequence.
+ * @param nth Iterator of element that must be in position afterwards.
+ * @param end End iterator of input sequence.
+ * @param comp Comparator.
+ */
+template<typename RandomAccessIterator, typename Comparator>
void
parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth,
RandomAccessIterator end, Comparator comp)
@@ -324,65 +350,65 @@ namespace __gnu_parallel
// Break if input range to small.
while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
{
- difference_type n = end - begin;
-
- RandomAccessIterator pivot_pos = begin + rng(n);
-
- // Swap pivot_pos value to end.
- if (pivot_pos != (end - 1))
- std::swap(*pivot_pos, *(end - 1));
- pivot_pos = end - 1;
-
- // XXX Comparator must have first_value_type, second_value_type, result_type
- // Comparator == __gnu_parallel::lexicographic<S, int, __gnu_parallel::less<S, S> >
- // pivot_pos == std::pair<S, int>*
- // XXX binder2nd only for RandomAccessIterators??
- __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> pred(comp, *pivot_pos);
-
- // Divide, leave pivot unchanged in last place.
- RandomAccessIterator split_pos1, split_pos2;
- split_pos1 = begin + parallel_partition(begin, end - 1, pred, get_max_threads());
-
- // Left side: < pivot_pos; right side: >= pivot_pos
-
- // Swap pivot back to middle.
- if (split_pos1 != pivot_pos)
- std::swap(*split_pos1, *pivot_pos);
- pivot_pos = split_pos1;
-
- // In case all elements are equal, split_pos1 == 0
- if ((split_pos1 + 1 - begin) < (n >> 7) || (end - split_pos1) < (n >> 7))
- {
- // Very unequal split, one part smaller than one 128th
- // elements not stricly larger than the pivot.
- __gnu_parallel::unary_negate<__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>, value_type> pred(__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>(comp, *pivot_pos));
-
- // Find other end of pivot-equal range.
- split_pos2 = __gnu_sequential::partition(split_pos1 + 1, end, pred);
- }
- else
- // Only skip the pivot.
- split_pos2 = split_pos1 + 1;
-
- // Compare iterators.
- if (split_pos2 <= nth)
- begin = split_pos2;
- else if (nth < split_pos1)
- end = split_pos1;
- else
- break;
+ difference_type n = end - begin;
+
+ RandomAccessIterator pivot_pos = begin + rng(n);
+
+ // Swap pivot_pos value to end.
+ if (pivot_pos != (end - 1))
+ std::swap(*pivot_pos, *(end - 1));
+ pivot_pos = end - 1;
+
+ // XXX Comparator must have first_value_type, second_value_type, result_type
+ // Comparator == __gnu_parallel::lexicographic<S, int, __gnu_parallel::less<S, S> >
+ // pivot_pos == std::pair<S, int>*
+ // XXX binder2nd only for RandomAccessIterators??
+ __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> pred(comp, *pivot_pos);
+
+ // Divide, leave pivot unchanged in last place.
+ RandomAccessIterator split_pos1, split_pos2;
+ split_pos1 = begin + parallel_partition(begin, end - 1, pred, get_max_threads());
+
+ // Left side: < pivot_pos; right side: >= pivot_pos
+
+ // Swap pivot back to middle.
+ if (split_pos1 != pivot_pos)
+ std::swap(*split_pos1, *pivot_pos);
+ pivot_pos = split_pos1;
+
+ // In case all elements are equal, split_pos1 == 0
+ if ((split_pos1 + 1 - begin) < (n >> 7) || (end - split_pos1) < (n >> 7))
+ {
+ // Very unequal split, one part smaller than one 128th
+ // elements not stricly larger than the pivot.
+ __gnu_parallel::unary_negate<__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>, value_type> pred(__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>(comp, *pivot_pos));
+
+ // Find other end of pivot-equal range.
+ split_pos2 = __gnu_sequential::partition(split_pos1 + 1, end, pred);
+ }
+ else
+ // Only skip the pivot.
+ split_pos2 = split_pos1 + 1;
+
+ // Compare iterators.
+ if (split_pos2 <= nth)
+ begin = split_pos2;
+ else if (nth < split_pos1)
+ end = split_pos1;
+ else
+ break;
}
// Only at most Settings::partition_minimal_n elements left.
__gnu_sequential::sort(begin, end, comp);
}
- /** @brief Parallel implementation of std::partial_sort().
- * @param begin Begin iterator of input sequence.
- * @param middle Sort until this position.
- * @param end End iterator of input sequence.
- * @param comp Comparator. */
- template<typename RandomAccessIterator, typename Comparator>
+/** @brief Parallel implementation of std::partial_sort().
+* @param begin Begin iterator of input sequence.
+* @param middle Sort until this position.
+* @param end End iterator of input sequence.
+* @param comp Comparator. */
+template<typename RandomAccessIterator, typename Comparator>
void
parallel_partial_sort(RandomAccessIterator begin, RandomAccessIterator middle, RandomAccessIterator end, Comparator comp)
{
@@ -390,7 +416,7 @@ namespace __gnu_parallel
std::sort(begin, middle, comp);
}
-} //namespace __gnu_parallel
+} //namespace __gnu_parallel
#undef _GLIBCXX_VOLATILE
diff --git a/libstdc++-v3/include/parallel/quicksort.h b/libstdc++-v3/include/parallel/quicksort.h
index a9ceab4..4eb3578 100644
--- a/libstdc++-v3/include/parallel/quicksort.h
+++ b/libstdc++-v3/include/parallel/quicksort.h
@@ -53,11 +53,17 @@ namespace __gnu_parallel
* this part.
*/
template<typename RandomAccessIterator, typename Comparator>
- inline typename std::iterator_traits<RandomAccessIterator>::difference_type
- parallel_sort_qs_divide(RandomAccessIterator begin, RandomAccessIterator end,
- Comparator comp,
- typename std::iterator_traits<RandomAccessIterator>::difference_type pivot_rank,
- typename std::iterator_traits<RandomAccessIterator>::difference_type num_samples, thread_index_t num_threads)
+ inline
+ typename std::iterator_traits<RandomAccessIterator>::difference_type
+ parallel_sort_qs_divide(
+ RandomAccessIterator begin,
+ RandomAccessIterator end,
+ Comparator comp,
+ typename std::iterator_traits<RandomAccessIterator>::difference_type
+ pivot_rank,
+ typename std::iterator_traits<RandomAccessIterator>::difference_type
+ num_samples,
+ thread_index_t num_threads)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
@@ -65,20 +71,24 @@ namespace __gnu_parallel
difference_type n = end - begin;
num_samples = std::min(num_samples, n);
- value_type* samples = static_cast<value_type*>(__builtin_alloca(sizeof(value_type) * num_samples));
+
+ // Allocate uninitialized, to avoid default constructor.
+ value_type* samples = static_cast<value_type*>(
+ operator new(num_samples * sizeof(value_type)));
for (difference_type s = 0; s < num_samples; s++)
{
- const unsigned long long index = static_cast<unsigned long long>(s)
- * n / num_samples;
- samples[s] = begin[index];
+ const unsigned long long index = static_cast<unsigned long long>(s)
+ * n / num_samples;
+ new(samples + s) value_type(begin[index]);
}
__gnu_sequential::sort(samples, samples + num_samples, comp);
value_type& pivot = samples[pivot_rank * num_samples / n];
- __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> pred(comp, pivot);
+ __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
+ pred(comp, pivot);
difference_type split = parallel_partition(begin, end, pred, num_threads);
return split;
@@ -93,7 +103,10 @@ namespace __gnu_parallel
*/
template<typename RandomAccessIterator, typename Comparator>
inline void
- parallel_sort_qs_conquer(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, int num_threads)
+ parallel_sort_qs_conquer(RandomAccessIterator begin,
+ RandomAccessIterator end,
+ Comparator comp,
+ thread_index_t num_threads)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
@@ -101,8 +114,8 @@ namespace __gnu_parallel
if (num_threads <= 1)
{
- __gnu_sequential::sort(begin, end, comp);
- return;
+ __gnu_sequential::sort(begin, end, comp);
+ return;
}
difference_type n = end - begin, pivot_rank;
@@ -110,24 +123,27 @@ namespace __gnu_parallel
if (n <= 1)
return;
- thread_index_t num_processors_left;
+ thread_index_t num_threads_left;
if ((num_threads % 2) == 1)
- num_processors_left = num_threads / 2 + 1;
+ num_threads_left = num_threads / 2 + 1;
else
- num_processors_left = num_threads / 2;
+ num_threads_left = num_threads / 2;
- pivot_rank = n * num_processors_left / num_threads;
+ pivot_rank = n * num_threads_left / num_threads;
- difference_type split = parallel_sort_qs_divide(begin, end, comp, pivot_rank,
-Settings::sort_qs_num_samples_preset, num_threads);
+ difference_type split = parallel_sort_qs_divide(
+ begin, end, comp, pivot_rank,
+ Settings::sort_qs_num_samples_preset, num_threads);
#pragma omp parallel sections
{
#pragma omp section
- parallel_sort_qs_conquer(begin, begin + split, comp, num_processors_left);
+ parallel_sort_qs_conquer(begin, begin + split,
+ comp, num_threads_left);
#pragma omp section
- parallel_sort_qs_conquer(begin + split, end, comp, num_threads - num_processors_left);
+ parallel_sort_qs_conquer(begin + split, end,
+ comp, num_threads - num_threads_left);
}
}
@@ -143,9 +159,12 @@ Settings::sort_qs_num_samples_preset, num_threads);
*/
template<typename RandomAccessIterator, typename Comparator>
inline void
- parallel_sort_qs(RandomAccessIterator begin, RandomAccessIterator end,
- Comparator comp,
- typename std::iterator_traits<RandomAccessIterator>::difference_type n, int num_threads)
+ parallel_sort_qs(
+ RandomAccessIterator begin,
+ RandomAccessIterator end,
+ Comparator comp,
+ typename std::iterator_traits<RandomAccessIterator>::difference_type n,
+ int num_threads)
{
_GLIBCXX_CALL(n)
@@ -165,12 +184,9 @@ Settings::sort_qs_num_samples_preset, num_threads);
// Hard to avoid.
omp_set_num_threads(num_threads);
- bool old_nested = (omp_get_nested() != 0);
- omp_set_nested(true);
parallel_sort_qs_conquer(begin, begin + n, comp, num_threads);
- omp_set_nested(old_nested);
}
-} //namespace __gnu_parallel
+} //namespace __gnu_parallel
#endif
diff --git a/libstdc++-v3/include/parallel/random_shuffle.h b/libstdc++-v3/include/parallel/random_shuffle.h
index 933ab3e..d7a82fb 100644
--- a/libstdc++-v3/include/parallel/random_shuffle.h
+++ b/libstdc++-v3/include/parallel/random_shuffle.h
@@ -45,16 +45,16 @@
namespace __gnu_parallel
{
- /** @brief Type to hold the index of a bin.
- *
- * Since many variables of this type are allocated, it should be
- * chosen as small as possible.
- */
- typedef unsigned short bin_index;
-
- /** @brief Data known to every thread participating in
- __gnu_parallel::parallel_random_shuffle(). */
- template<typename RandomAccessIterator>
+/** @brief Type to hold the index of a bin.
+ *
+ * Since many variables of this type are allocated, it should be
+ * chosen as small as possible.
+ */
+typedef unsigned short bin_index;
+
+/** @brief Data known to every thread participating in
+ __gnu_parallel::parallel_random_shuffle(). */
+template<typename RandomAccessIterator>
struct DRandomShufflingGlobalData
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
@@ -90,18 +90,15 @@ namespace __gnu_parallel
: source(_source) { }
};
- /** @brief Local data for a thread participating in
- __gnu_parallel::parallel_random_shuffle().
- */
- template<typename RandomAccessIterator, typename RandomNumberGenerator>
+/** @brief Local data for a thread participating in
+ __gnu_parallel::parallel_random_shuffle().
+ */
+template<typename RandomAccessIterator, typename RandomNumberGenerator>
struct DRSSorterPU
{
/** @brief Number of threads participating in total. */
int num_threads;
- /** @brief Number of owning thread. */
- int iam;
-
/** @brief Begin index for bins taken care of by this thread. */
bin_index bins_begin;
@@ -115,29 +112,29 @@ namespace __gnu_parallel
DRandomShufflingGlobalData<RandomAccessIterator>* sd;
};
- /** @brief Generate a random number in @c [0,2^logp).
- * @param logp Logarithm (basis 2) of the upper range bound.
- * @param rng Random number generator to use.
- */
- template<typename RandomNumberGenerator>
+/** @brief Generate a random number in @c [0,2^logp).
+ * @param logp Logarithm (basis 2) of the upper range bound.
+ * @param rng Random number generator to use.
+ */
+template<typename RandomNumberGenerator>
inline int
random_number_pow2(int logp, RandomNumberGenerator& rng)
{ return rng.genrand_bits(logp); }
- /** @brief Random shuffle code executed by each thread.
- * @param pus Array of thread-local data records. */
- template<typename RandomAccessIterator, typename RandomNumberGenerator>
+/** @brief Random shuffle code executed by each thread.
+ * @param pus Array of thread-local data records. */
+template<typename RandomAccessIterator, typename RandomNumberGenerator>
inline void
- parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator,
- RandomNumberGenerator>* pus)
+ parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator,
+ RandomNumberGenerator>* pus)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
- DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[omp_get_thread_num()];
+ thread_index_t iam = omp_get_thread_num();
+ DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[iam];
DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
- thread_index_t iam = d->iam;
// Indexing: dist[bin][processor]
difference_type length = sd->starts[iam + 1] - sd->starts[iam];
@@ -156,35 +153,35 @@ namespace __gnu_parallel
// First main loop.
for (difference_type i = 0; i < length; i++)
{
- bin_index oracle = random_number_pow2(num_bits, rng);
- oracles[i] = oracle;
+ bin_index oracle = random_number_pow2(num_bits, rng);
+ oracles[i] = oracle;
- // To allow prefix (partial) sum.
- dist[oracle + 1]++;
+ // To allow prefix (partial) sum.
+ dist[oracle + 1]++;
}
for (bin_index b = 0; b < sd->num_bins + 1; b++)
sd->dist[b][iam + 1] = dist[b];
-#pragma omp barrier
+# pragma omp barrier
-#pragma omp single
+# pragma omp single
{
// Sum up bins, sd->dist[s + 1][d->num_threads] now contains the
// total number of items in bin s
for (bin_index s = 0; s < sd->num_bins; s++)
- __gnu_sequential::partial_sum(sd->dist[s + 1],
- sd->dist[s + 1] + d->num_threads + 1,
- sd->dist[s + 1]);
+ __gnu_sequential::partial_sum(sd->dist[s + 1],
+ sd->dist[s + 1] + d->num_threads + 1,
+ sd->dist[s + 1]);
}
-#pragma omp barrier
+# pragma omp barrier
sequence_index_t offset = 0, global_offset = 0;
for (bin_index s = 0; s < d->bins_begin; s++)
global_offset += sd->dist[s + 1][d->num_threads];
-#pragma omp barrier
+# pragma omp barrier
for (bin_index s = d->bins_begin; s < d->bins_end; s++)
{
@@ -193,9 +190,10 @@ namespace __gnu_parallel
offset = sd->dist[s + 1][d->num_threads];
}
- sd->temporaries[iam] = static_cast<value_type*>(::operator new(sizeof(value_type) * offset));
+ sd->temporaries[iam] = static_cast<value_type*>(
+ ::operator new(sizeof(value_type) * offset));
-#pragma omp barrier
+# pragma omp barrier
// Draw local copies to avoid false sharing.
for (bin_index b = 0; b < sd->num_bins + 1; b++)
@@ -211,11 +209,11 @@ namespace __gnu_parallel
// Distribute according to oracles, second main loop.
for (difference_type i = 0; i < length; i++)
{
- bin_index target_bin = oracles[i];
- thread_index_t target_p = bin_proc[target_bin];
+ bin_index target_bin = oracles[i];
+ thread_index_t target_p = bin_proc[target_bin];
- // Last column [d->num_threads] stays unchanged.
- temporaries[target_p][dist[target_bin + 1]++] = *(source + i + start);
+ // Last column [d->num_threads] stays unchanged.
+ temporaries[target_p][dist[target_bin + 1]++] = *(source + i + start);
}
delete[] oracles;
@@ -223,23 +221,27 @@ namespace __gnu_parallel
delete[] bin_proc;
delete[] temporaries;
-#pragma omp barrier
+# pragma omp barrier
// Shuffle bins internally.
for (bin_index b = d->bins_begin; b < d->bins_end; b++)
{
- value_type* begin = sd->temporaries[iam] + ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
- * end = sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
- sequential_random_shuffle(begin, end, rng);
- std::copy(begin, end, sd->source + global_offset + ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
+ value_type* begin =
+ sd->temporaries[iam] +
+ ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
+ * end =
+ sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
+ sequential_random_shuffle(begin, end, rng);
+ std::copy(begin, end, sd->source + global_offset +
+ ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
}
delete[] sd->temporaries[iam];
}
- /** @brief Round up to the next greater power of 2.
- * @param x Integer to round up */
- template<typename T>
+/** @brief Round up to the next greater power of 2.
+ * @param x Integer to round up */
+template<typename T>
T
round_up_to_pow2(T x)
{
@@ -249,16 +251,21 @@ namespace __gnu_parallel
return (T)1 << (log2(x - 1) + 1);
}
- /** @brief Main parallel random shuffle step.
- * @param begin Begin iterator of sequence.
- * @param end End iterator of sequence.
- * @param n Length of sequence.
- * @param num_threads Number of threads to use.
- * @param rng Random number generator to use.
- */
- template<typename RandomAccessIterator, typename RandomNumberGenerator>
+/** @brief Main parallel random shuffle step.
+ * @param begin Begin iterator of sequence.
+ * @param end End iterator of sequence.
+ * @param n Length of sequence.
+ * @param num_threads Number of threads to use.
+ * @param rng Random number generator to use.
+ */
+template<typename RandomAccessIterator, typename RandomNumberGenerator>
inline void
- parallel_random_shuffle_drs(RandomAccessIterator begin, RandomAccessIterator end, typename std::iterator_traits<RandomAccessIterator>::difference_type n, int num_threads, RandomNumberGenerator& rng)
+ parallel_random_shuffle_drs(
+ RandomAccessIterator begin,
+ RandomAccessIterator end,
+ typename std::iterator_traits<RandomAccessIterator>::difference_type n,
+ thread_index_t num_threads,
+ RandomNumberGenerator& rng)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
@@ -275,87 +282,99 @@ namespace __gnu_parallel
// Try the L1 cache first.
// Must fit into L1.
- num_bins_cache = std::max((difference_type)1, (difference_type)(n / (Settings::L1_cache_size_lb / sizeof(value_type))));
+ num_bins_cache = std::max<difference_type>(
+ 1, n / (Settings::L1_cache_size_lb / sizeof(value_type)));
num_bins_cache = round_up_to_pow2(num_bins_cache);
// No more buckets than TLB entries, power of 2
// Power of 2 and at least one element per bin, at most the TLB size.
- num_bins = std::min(n, (difference_type)num_bins_cache);
+ num_bins = std::min<difference_type>(n, num_bins_cache);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin.
- num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins);
+ num_bins = std::min<difference_type>(Settings::TLB_size / 2, num_bins);
#endif
num_bins = round_up_to_pow2(num_bins);
if (num_bins < num_bins_cache)
{
#endif
- // Now try the L2 cache
- // Must fit into L2
- num_bins_cache = static_cast<bin_index>(std::max((difference_type)1, (difference_type)(n / (Settings::L2_cache_size / sizeof(value_type)))));
- num_bins_cache = round_up_to_pow2(num_bins_cache);
-
- // No more buckets than TLB entries, power of 2.
- num_bins = static_cast<bin_index>(std::min(n, (difference_type)num_bins_cache));
- // Power of 2 and at least one element per bin, at most the TLB size.
+ // Now try the L2 cache
+ // Must fit into L2
+ num_bins_cache = static_cast<bin_index>(std::max<difference_type>(
+ 1, n / (Settings::L2_cache_size / sizeof(value_type))));
+ num_bins_cache = round_up_to_pow2(num_bins_cache);
+
+ // No more buckets than TLB entries, power of 2.
+ num_bins = static_cast<bin_index>(
+ std::min(n, static_cast<difference_type>(num_bins_cache)));
+ // Power of 2 and at least one element per bin, at most the TLB size.
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
- // 2 TLB entries needed per bin.
- num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins);
+ // 2 TLB entries needed per bin.
+ num_bins = std::min(
+ static_cast<difference_type>(Settings::TLB_size / 2), num_bins);
#endif
- num_bins = round_up_to_pow2(num_bins);
+ num_bins = round_up_to_pow2(num_bins);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
}
#endif
- num_threads = std::min((bin_index)num_threads, (bin_index)num_bins);
+ num_threads = std::min<bin_index>(num_threads, num_bins);
if (num_threads <= 1)
return sequential_random_shuffle(begin, end, rng);
DRandomShufflingGlobalData<RandomAccessIterator> sd(begin);
+ DRSSorterPU<RandomAccessIterator, random_number >* pus;
+ difference_type* starts;
- DRSSorterPU<RandomAccessIterator, random_number >* pus = new DRSSorterPU<RandomAccessIterator, random_number >[num_threads];
-
- sd.temporaries = new value_type*[num_threads];
- //sd.oracles = new bin_index[n];
- sd.dist = new difference_type*[num_bins + 1];
- sd.bin_proc = new thread_index_t[num_bins];
- for (bin_index b = 0; b < num_bins + 1; b++)
- sd.dist[b] = new difference_type[num_threads + 1];
- for (bin_index b = 0; b < (num_bins + 1); b++)
+# pragma omp parallel num_threads(num_threads)
{
- sd.dist[0][0] = 0;
- sd.dist[b][0] = 0;
- }
- difference_type* starts = sd.starts = new difference_type[num_threads + 1];
- int bin_cursor = 0;
- sd.num_bins = num_bins;
- sd.num_bits = log2(num_bins);
-
- difference_type chunk_length = n / num_threads, split = n % num_threads, start = 0;
- int bin_chunk_length = num_bins / num_threads, bin_split = num_bins % num_threads;
- for (int i = 0; i < num_threads; i++)
- {
- starts[i] = start;
- start += (i < split) ? (chunk_length + 1) : chunk_length;
- int j = pus[i].bins_begin = bin_cursor;
-
- // Range of bins for this processor.
- bin_cursor += (i < bin_split) ? (bin_chunk_length + 1) : bin_chunk_length;
- pus[i].bins_end = bin_cursor;
- for (; j < bin_cursor; j++)
- sd.bin_proc[j] = i;
- pus[i].num_threads = num_threads;
- pus[i].iam = i;
- pus[i].seed = rng(std::numeric_limits<uint32>::max());
- pus[i].sd = &sd;
- }
- starts[num_threads] = start;
-
- // Now shuffle in parallel.
-#pragma omp parallel num_threads(num_threads)
- parallel_random_shuffle_drs_pu(pus);
+# pragma omp single
+ {
+ pus = new DRSSorterPU<RandomAccessIterator, random_number>
+ [num_threads];
+
+ sd.temporaries = new value_type*[num_threads];
+ sd.dist = new difference_type*[num_bins + 1];
+ sd.bin_proc = new thread_index_t[num_bins];
+ for (bin_index b = 0; b < num_bins + 1; b++)
+ sd.dist[b] = new difference_type[num_threads + 1];
+ for (bin_index b = 0; b < (num_bins + 1); b++)
+ {
+ sd.dist[0][0] = 0;
+ sd.dist[b][0] = 0;
+ }
+ starts = sd.starts = new difference_type[num_threads + 1];
+ int bin_cursor = 0;
+ sd.num_bins = num_bins;
+ sd.num_bits = log2(num_bins);
+
+ difference_type chunk_length = n / num_threads,
+ split = n % num_threads, start = 0;
+ difference_type bin_chunk_length = num_bins / num_threads,
+ bin_split = num_bins % num_threads;
+ for (thread_index_t i = 0; i < num_threads; i++)
+ {
+ starts[i] = start;
+ start += (i < split) ? (chunk_length + 1) : chunk_length;
+ int j = pus[i].bins_begin = bin_cursor;
+
+ // Range of bins for this processor.
+ bin_cursor += (i < bin_split) ?
+ (bin_chunk_length + 1) : bin_chunk_length;
+ pus[i].bins_end = bin_cursor;
+ for (; j < bin_cursor; j++)
+ sd.bin_proc[j] = i;
+ pus[i].num_threads = num_threads;
+ pus[i].seed = rng(std::numeric_limits<uint32>::max());
+ pus[i].sd = &sd;
+ }
+ starts[num_threads] = start;
+ } //single
+ // Now shuffle in parallel.
+ parallel_random_shuffle_drs_pu(pus);
+ }
delete[] starts;
delete[] sd.bin_proc;
@@ -367,16 +386,16 @@ namespace __gnu_parallel
delete[] pus;
}
- /** @brief Sequential cache-efficient random shuffle.
- * @param begin Begin iterator of sequence.
- * @param end End iterator of sequence.
- * @param rng Random number generator to use.
- */
- template<typename RandomAccessIterator, typename RandomNumberGenerator>
+/** @brief Sequential cache-efficient random shuffle.
+ * @param begin Begin iterator of sequence.
+ * @param end End iterator of sequence.
+ * @param rng Random number generator to use.
+ */
+template<typename RandomAccessIterator, typename RandomNumberGenerator>
inline void
sequential_random_shuffle(RandomAccessIterator begin,
- RandomAccessIterator end,
- RandomNumberGenerator& rng)
+ RandomAccessIterator end,
+ RandomNumberGenerator& rng)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
@@ -388,7 +407,9 @@ namespace __gnu_parallel
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
// Try the L1 cache first, must fit into L1.
- num_bins_cache = std::max((difference_type)1, (difference_type)(n / (Settings::L1_cache_size_lb / sizeof(value_type))));
+ num_bins_cache =
+ std::max<difference_type>
+ (1, n / (Settings::L1_cache_size_lb / sizeof(value_type)));
num_bins_cache = round_up_to_pow2(num_bins_cache);
// No more buckets than TLB entries, power of 2
@@ -403,19 +424,23 @@ namespace __gnu_parallel
if (num_bins < num_bins_cache)
{
#endif
- // Now try the L2 cache, must fit into L2.
- num_bins_cache = static_cast<bin_index>(std::max((difference_type)1, (difference_type)(n / (Settings::L2_cache_size / sizeof(value_type)))));
- num_bins_cache = round_up_to_pow2(num_bins_cache);
+ // Now try the L2 cache, must fit into L2.
+ num_bins_cache =
+ static_cast<bin_index>(std::max<difference_type>(
+ 1, n / (Settings::L2_cache_size / sizeof(value_type))));
+ num_bins_cache = round_up_to_pow2(num_bins_cache);
- // No more buckets than TLB entries, power of 2
- // Power of 2 and at least one element per bin, at most the TLB size.
- num_bins = static_cast<bin_index>(std::min(n, (difference_type)num_bins_cache));
+ // No more buckets than TLB entries, power of 2
+ // Power of 2 and at least one element per bin, at most the TLB size.
+ num_bins = static_cast<bin_index>
+ (std::min(n, static_cast<difference_type>(num_bins_cache)));
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
- // 2 TLB entries needed per bin
- num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins);
+ // 2 TLB entries needed per bin
+ num_bins =
+ std::min<difference_type>(Settings::TLB_size / 2, num_bins);
#endif
- num_bins = round_up_to_pow2(num_bins);
+ num_bins = round_up_to_pow2(num_bins);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
}
#endif
@@ -424,58 +449,62 @@ namespace __gnu_parallel
if (num_bins > 1)
{
- value_type* target = static_cast<value_type*>(::operator new(sizeof(value_type) * n));
- bin_index* oracles = new bin_index[n];
- difference_type* dist0 = new difference_type[num_bins + 1], * dist1 = new difference_type[num_bins + 1];
-
- for (int b = 0; b < num_bins + 1; b++)
- dist0[b] = 0;
-
- random_number bitrng(rng(0xFFFFFFFF));
-
- for (difference_type i = 0; i < n; i++)
- {
- bin_index oracle = random_number_pow2(num_bits, bitrng);
- oracles[i] = oracle;
-
- // To allow prefix (partial) sum.
- dist0[oracle + 1]++;
- }
-
- // Sum up bins.
- __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
-
- for (int b = 0; b < num_bins + 1; b++)
- dist1[b] = dist0[b];
-
- // Distribute according to oracles.
- for (difference_type i = 0; i < n; i++)
- target[(dist0[oracles[i]])++] = *(begin + i);
-
- for (int b = 0; b < num_bins; b++)
- {
- sequential_random_shuffle(target + dist1[b], target + dist1[b + 1],
- rng);
- }
-
- delete[] dist0;
- delete[] dist1;
- delete[] oracles;
- delete[] target;
+ value_type* target = static_cast<value_type*>(
+ ::operator new(sizeof(value_type) * n));
+ bin_index* oracles = new bin_index[n];
+ difference_type* dist0 = new difference_type[num_bins + 1],
+ * dist1 = new difference_type[num_bins + 1];
+
+ for (int b = 0; b < num_bins + 1; b++)
+ dist0[b] = 0;
+
+ random_number bitrng(rng(0xFFFFFFFF));
+
+ for (difference_type i = 0; i < n; i++)
+ {
+ bin_index oracle = random_number_pow2(num_bits, bitrng);
+ oracles[i] = oracle;
+
+ // To allow prefix (partial) sum.
+ dist0[oracle + 1]++;
+ }
+
+ // Sum up bins.
+ __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
+
+ for (int b = 0; b < num_bins + 1; b++)
+ dist1[b] = dist0[b];
+
+ // Distribute according to oracles.
+ for (difference_type i = 0; i < n; i++)
+ target[(dist0[oracles[i]])++] = *(begin + i);
+
+ for (int b = 0; b < num_bins; b++)
+ {
+ sequential_random_shuffle(target + dist1[b],
+ target + dist1[b + 1],
+ rng);
+ }
+
+ delete[] dist0;
+ delete[] dist1;
+ delete[] oracles;
+ delete[] target;
}
else
__gnu_sequential::random_shuffle(begin, end, rng);
}
- /** @brief Parallel random public call.
- * @param begin Begin iterator of sequence.
- * @param end End iterator of sequence.
- * @param rng Random number generator to use.
- */
- template<typename RandomAccessIterator, typename RandomNumberGenerator>
+/** @brief Parallel random public call.
+ * @param begin Begin iterator of sequence.
+ * @param end End iterator of sequence.
+ * @param rng Random number generator to use.
+ */
+template<typename RandomAccessIterator, typename RandomNumberGenerator>
inline void
- parallel_random_shuffle(RandomAccessIterator begin, RandomAccessIterator end,
- RandomNumberGenerator rng = random_number())
+ parallel_random_shuffle(RandomAccessIterator begin,
+ RandomAccessIterator end,
+ RandomNumberGenerator rng = random_number())
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
diff --git a/libstdc++-v3/include/parallel/search.h b/libstdc++-v3/include/parallel/search.h
index ba00a8f..d917b12 100644
--- a/libstdc++-v3/include/parallel/search.h
+++ b/libstdc++-v3/include/parallel/search.h
@@ -53,10 +53,10 @@ namespace __gnu_parallel
* @param length Length of sequence to search for.
* @param advances Returned offsets.
*/
- template<typename RandomAccessIterator, typename _DifferenceTp>
+template<typename RandomAccessIterator, typename _DifferenceTp>
void
calc_borders(RandomAccessIterator elements, _DifferenceTp length,
- _DifferenceTp* off)
+ _DifferenceTp* off)
{
typedef _DifferenceTp difference_type;
@@ -66,9 +66,9 @@ namespace __gnu_parallel
difference_type k = 0;
for (difference_type j = 2; j <= length; j++)
{
- while ((k >= 0) && !(elements[k] == elements[j-1]))
- k = off[k];
- off[j] = ++k;
+ while ((k >= 0) && !(elements[k] == elements[j-1]))
+ k = off[k];
+ off[j] = ++k;
}
}
@@ -81,11 +81,14 @@ namespace __gnu_parallel
* @param end2 End iterator of second sequence.
* @param pred Find predicate.
* @return Place of finding in first sequences. */
- template<typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename Pred>
+template<
+ typename _RandomAccessIterator1,
+ typename _RandomAccessIterator2,
+ typename Pred>
_RandomAccessIterator1
search_template(_RandomAccessIterator1 begin1, _RandomAccessIterator1 end1,
- _RandomAccessIterator2 begin2, _RandomAccessIterator2 end2,
- Pred pred)
+ _RandomAccessIterator2 begin2, _RandomAccessIterator2 end2,
+ Pred pred)
{
typedef std::iterator_traits<_RandomAccessIterator1> traits_type;
typedef typename traits_type::difference_type difference_type;
@@ -103,60 +106,71 @@ namespace __gnu_parallel
// Where is first occurrence of pattern? defaults to end.
difference_type result = (end1 - begin1);
+ difference_type *splitters;
// Pattern too long.
if (input_length < 0)
return end1;
- thread_index_t num_threads = std::max<difference_type>(1, std::min<difference_type>(input_length, __gnu_parallel::get_max_threads()));
-
omp_lock_t result_lock;
omp_init_lock(&result_lock);
- difference_type borders[num_threads + 1];
- __gnu_parallel::equally_split(input_length, num_threads, borders);
+ thread_index_t num_threads =
+ std::max<difference_type>(1,
+ std::min<difference_type>(input_length, get_max_threads()));
difference_type advances[pattern_length];
calc_borders(begin2, pattern_length, advances);
-#pragma omp parallel num_threads(num_threads)
- {
- thread_index_t iam = omp_get_thread_num();
-
- difference_type start = borders[iam], stop = borders[iam + 1];
-
- difference_type pos_in_pattern = 0;
- bool found_pattern = false;
-
- while (start <= stop && !found_pattern)
- {
- // Get new value of result.
-#pragma omp flush(result)
- // No chance for this thread to find first occurrence.
- if (result < start)
- break;
- while (pred(begin1[start + pos_in_pattern], begin2[pos_in_pattern]))
- {
- ++pos_in_pattern;
- if (pos_in_pattern == pattern_length)
- {
- // Found new candidate for result.
- omp_set_lock(&result_lock);
- result = std::min(result, start);
- omp_unset_lock(&result_lock);
-
- found_pattern = true;
- break;
- }
- }
- // Make safe jump.
- start += (pos_in_pattern - advances[pos_in_pattern]);
- pos_in_pattern = (advances[pos_in_pattern] < 0) ? 0 : advances[pos_in_pattern];
- }
- }
+# pragma omp parallel num_threads(num_threads)
+ {
+# pragma omp single
+ {
+ num_threads = omp_get_num_threads();
+ splitters = new difference_type[num_threads + 1];
+ equally_split(input_length, num_threads, splitters);
+ }
+
+ thread_index_t iam = omp_get_thread_num();
+
+ difference_type start = splitters[iam], stop = splitters[iam + 1];
+
+ difference_type pos_in_pattern = 0;
+ bool found_pattern = false;
+
+ while (start <= stop && !found_pattern)
+ {
+ // Get new value of result.
+ #pragma omp flush(result)
+ // No chance for this thread to find first occurrence.
+ if (result < start)
+ break;
+ while (pred(begin1[start + pos_in_pattern],
+ begin2[pos_in_pattern]))
+ {
+ ++pos_in_pattern;
+ if (pos_in_pattern == pattern_length)
+ {
+ // Found new candidate for result.
+ omp_set_lock(&result_lock);
+ result = std::min(result, start);
+ omp_unset_lock(&result_lock);
+
+ found_pattern = true;
+ break;
+ }
+ }
+ // Make safe jump.
+ start += (pos_in_pattern - advances[pos_in_pattern]);
+ pos_in_pattern =
+ (advances[pos_in_pattern] < 0) ? 0 : advances[pos_in_pattern];
+ }
+ } //parallel
omp_destroy_lock(&result_lock);
+ delete[] splitters;
+
// Return iterator on found element.
return (begin1 + result);
}
diff --git a/libstdc++-v3/include/parallel/set_operations.h b/libstdc++-v3/include/parallel/set_operations.h
index 2a16691..7be3127 100644
--- a/libstdc++-v3/include/parallel/set_operations.h
+++ b/libstdc++-v3/include/parallel/set_operations.h
@@ -47,28 +47,31 @@
namespace __gnu_parallel
{
- template<typename InputIterator, typename OutputIterator>
+template<typename InputIterator, typename OutputIterator>
inline OutputIterator
copy_tail(std::pair<InputIterator, InputIterator> b,
- std::pair<InputIterator, InputIterator> e, OutputIterator r)
+ std::pair<InputIterator, InputIterator> e, OutputIterator r)
{
if (b.first != e.first)
{
- do
- {
- *r++ = *b.first++;
- }
- while (b.first != e.first);
+ do
+ {
+ *r++ = *b.first++;
+ }
+ while (b.first != e.first);
}
else
{
- while (b.second != e.second)
- *r++ = *b.second++;
+ while (b.second != e.second)
+ *r++ = *b.second++;
}
return r;
}
- template<typename InputIterator, typename OutputIterator, typename Comparator>
+template<
+ typename InputIterator,
+ typename OutputIterator,
+ typename Comparator>
struct symmetric_difference_func
{
typedef std::iterator_traits<InputIterator> traits_type;
@@ -80,55 +83,56 @@ namespace __gnu_parallel
Comparator comp;
inline OutputIterator invoke(InputIterator a, InputIterator b,
- InputIterator c, InputIterator d,
- OutputIterator r) const
+ InputIterator c, InputIterator d,
+ OutputIterator r) const
{
while (a != b && c != d)
- {
- if (comp(*a, *c))
- {
- *r = *a;
- ++a;
- ++r;
- }
- else if (comp(*c, *a))
- {
- *r = *c;
- ++c;
- ++r;
- }
- else
- {
- ++a;
- ++c;
- }
- }
+ {
+ if (comp(*a, *c))
+ {
+ *r = *a;
+ ++a;
+ ++r;
+ }
+ else if (comp(*c, *a))
+ {
+ *r = *c;
+ ++c;
+ ++r;
+ }
+ else
+ {
+ ++a;
+ ++c;
+ }
+ }
return std::copy(c, d, std::copy(a, b, r));
}
inline difference_type
- count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) const
+ count(InputIterator a, InputIterator b, InputIterator c, InputIterator d)
+ const
{
difference_type counter = 0;
while (a != b && c != d)
- {
- if (comp(*a, *c))
- {
- ++a;
- ++counter;
- }
- else if (comp(*c, *a))
- {
- ++c;
- ++counter;
- }
- else
- {
- ++a;
- ++c;
- }
- }
+ {
+ if (comp(*a, *c))
+ {
+ ++a;
+ ++counter;
+ }
+ else if (comp(*c, *a))
+ {
+ ++c;
+ ++counter;
+ }
+ else
+ {
+ ++a;
+ ++c;
+ }
+ }
return counter + (b - a) + (d - c);
}
@@ -144,7 +148,10 @@ namespace __gnu_parallel
};
- template<typename InputIterator, typename OutputIterator, typename Comparator>
+template<
+ typename InputIterator,
+ typename OutputIterator,
+ typename Comparator>
struct difference_func
{
typedef std::iterator_traits<InputIterator> traits_type;
@@ -157,44 +164,45 @@ namespace __gnu_parallel
inline OutputIterator
invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d,
- OutputIterator r) const
+ OutputIterator r) const
{
while (a != b && c != d)
- {
- if (comp(*a, *c))
- {
- *r = *a;
- ++a;
- ++r;
- }
- else if (comp(*c, *a))
- { ++c; }
- else
- {
- ++a;
- ++c;
- }
- }
+ {
+ if (comp(*a, *c))
+ {
+ *r = *a;
+ ++a;
+ ++r;
+ }
+ else if (comp(*c, *a))
+ { ++c; }
+ else
+ {
+ ++a;
+ ++c;
+ }
+ }
return std::copy(a, b, r);
}
inline difference_type
- count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) const
+ count(InputIterator a, InputIterator b, InputIterator c, InputIterator d)
+ const
{
difference_type counter = 0;
while (a != b && c != d)
- {
- if (comp(*a, *c))
- {
- ++a;
- ++counter;
- }
- else if (comp(*c, *a))
- { ++c; }
- else
- { ++a; ++c; }
- }
+ {
+ if (comp(*a, *c))
+ {
+ ++a;
+ ++counter;
+ }
+ else if (comp(*c, *a))
+ { ++c; }
+ else
+ { ++a; ++c; }
+ }
return counter + (b - a);
}
@@ -209,7 +217,10 @@ namespace __gnu_parallel
};
- template<typename InputIterator, typename OutputIterator, typename Comparator>
+template<
+ typename InputIterator,
+ typename OutputIterator,
+ typename Comparator>
struct intersection_func
{
typedef std::iterator_traits<InputIterator> traits_type;
@@ -222,44 +233,45 @@ namespace __gnu_parallel
inline OutputIterator
invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d,
- OutputIterator r) const
+ OutputIterator r) const
{
while (a != b && c != d)
- {
- if (comp(*a, *c))
- { ++a; }
- else if (comp(*c, *a))
- { ++c; }
- else
- {
- *r = *a;
- ++a;
- ++c;
- ++r;
- }
- }
+ {
+ if (comp(*a, *c))
+ { ++a; }
+ else if (comp(*c, *a))
+ { ++c; }
+ else
+ {
+ *r = *a;
+ ++a;
+ ++c;
+ ++r;
+ }
+ }
return r;
}
inline difference_type
- count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) const
+ count(InputIterator a, InputIterator b, InputIterator c, InputIterator d)
+ const
{
difference_type counter = 0;
while (a != b && c != d)
- {
- if (comp(*a, *c))
- { ++a; }
- else if (comp(*c, *a))
- { ++c; }
- else
- {
- ++a;
- ++c;
- ++counter;
- }
- }
+ {
+ if (comp(*a, *c))
+ { ++a; }
+ else if (comp(*c, *a))
+ { ++c; }
+ else
+ {
+ ++a;
+ ++c;
+ ++counter;
+ }
+ }
return counter;
}
@@ -273,10 +285,11 @@ namespace __gnu_parallel
{ return out; }
};
- template<class InputIterator, class OutputIterator, class Comparator>
+template<class InputIterator, class OutputIterator, class Comparator>
struct union_func
{
- typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
+ typedef typename std::iterator_traits<InputIterator>::difference_type
+ difference_type;
union_func(Comparator c) : comp(c) {}
@@ -284,50 +297,50 @@ namespace __gnu_parallel
inline OutputIterator
invoke(InputIterator a, const InputIterator b, InputIterator c,
- const InputIterator d, OutputIterator r) const
+ const InputIterator d, OutputIterator r) const
{
while (a != b && c != d)
- {
- if (comp(*a, *c))
- {
- *r = *a;
- ++a;
- }
- else if (comp(*c, *a))
- {
- *r = *c;
- ++c;
- }
- else
- {
- *r = *a;
- ++a;
- ++c;
- }
- ++r;
- }
+ {
+ if (comp(*a, *c))
+ {
+ *r = *a;
+ ++a;
+ }
+ else if (comp(*c, *a))
+ {
+ *r = *c;
+ ++c;
+ }
+ else
+ {
+ *r = *a;
+ ++a;
+ ++c;
+ }
+ ++r;
+ }
return std::copy(c, d, std::copy(a, b, r));
}
inline difference_type
- count(InputIterator a, const InputIterator b, InputIterator c,
- const InputIterator d) const
+ count(InputIterator a, InputIterator b, InputIterator c, InputIterator d)
+ const
{
difference_type counter = 0;
while (a != b && c != d)
- {
- if (comp(*a, *c))
- { ++a; }
- else if (comp(*c, *a))
- { ++c; }
- else
- {
- ++a;
- ++c;
- }
- ++counter;
- }
+ {
+ if (comp(*a, *c))
+ { ++a; }
+ else if (comp(*c, *a))
+ { ++c; }
+ else
+ {
+ ++a;
+ ++c;
+ }
+ ++counter;
+ }
counter += (b - a);
counter += (d - c);
@@ -343,11 +356,14 @@ namespace __gnu_parallel
{ return std::copy(a, b, out); }
};
- template<typename InputIterator, typename OutputIterator, typename Operation>
+template<
+ typename InputIterator,
+ typename OutputIterator,
+ typename Operation>
OutputIterator
parallel_set_operation(InputIterator begin1, InputIterator end1,
- InputIterator begin2, InputIterator end2,
- OutputIterator result, Operation op)
+ InputIterator begin2, InputIterator end2,
+ OutputIterator result, Operation op)
{
_GLIBCXX_CALL((end1 - begin1) + (end2 - begin2))
@@ -355,7 +371,6 @@ namespace __gnu_parallel
typedef typename traits_type::difference_type difference_type;
typedef typename std::pair<InputIterator, InputIterator> iterator_pair;
-
if (begin1 == end1)
return op.first_empty(begin2, end2, result);
@@ -364,152 +379,174 @@ namespace __gnu_parallel
const difference_type size = (end1 - begin1) + (end2 - begin2);
- thread_index_t num_threads = std::min<difference_type>(std::min(end1 - begin1, end2 - begin2), get_max_threads());
-
- difference_type borders[num_threads + 2];
- equally_split(size, num_threads + 1, borders);
-
- const iterator_pair sequence[ 2 ] = { std::make_pair(begin1, end1), std::make_pair(begin2, end2) } ;
-
- iterator_pair block_begins[num_threads + 1];
-
- // Very start.
- block_begins[0] = std::make_pair(begin1, begin2);
- difference_type length[num_threads];
-
+ const iterator_pair sequence[ 2 ] =
+ { std::make_pair(begin1, end1), std::make_pair(begin2, end2) } ;
OutputIterator return_value = result;
+ difference_type *borders;
+ iterator_pair *block_begins;
+ difference_type* lengths;
-#pragma omp parallel num_threads(num_threads)
- {
- // Result from multiseq_partition.
- InputIterator offset[2];
- const int iam = omp_get_thread_num();
-
- const difference_type rank = borders[iam + 1];
-
- multiseq_partition(sequence, sequence + 2, rank, offset, op.comp);
-
- // allowed to read?
- // together
- // *(offset[ 0 ] - 1) == *offset[ 1 ]
- if (offset[ 0 ] != begin1 && offset[ 1 ] != end2
- && !op.comp(*(offset[ 0 ] - 1), *offset[ 1 ])
- && !op.comp(*offset[ 1 ], *(offset[ 0 ] - 1)))
- {
- // Avoid split between globally equal elements: move one to
- // front in first sequence.
- --offset[ 0 ];
- }
-
- iterator_pair block_end = block_begins[ iam + 1 ] = iterator_pair(offset[ 0 ], offset[ 1 ]);
-
- // Make sure all threads have their block_begin result written out.
-#pragma omp barrier
-
- iterator_pair block_begin = block_begins[ iam ];
-
- // Begin working for the first block, while the others except
- // the last start to count.
- if (iam == 0)
- {
- // The first thread can copy already.
- length[ iam ] = op.invoke(block_begin.first, block_end.first, block_begin.second, block_end.second, result) - result;
- }
- else
- {
- length[ iam ] = op.count(block_begin.first, block_end.first,
- block_begin.second, block_end.second);
- }
-
- // Make sure everyone wrote their lengths.
-#pragma omp barrier
-
- OutputIterator r = result;
-
- if (iam == 0)
- {
- // Do the last block.
- for (int i = 0; i < num_threads; ++i)
- r += length[i];
-
- block_begin = block_begins[num_threads];
-
- // Return the result iterator of the last block.
- return_value = op.invoke(block_begin.first, end1, block_begin.second, end2, r);
-
- }
- else
- {
- for (int i = 0; i < iam; ++i)
- r += length[ i ];
-
- // Reset begins for copy pass.
- op.invoke(block_begin.first, block_end.first,
- block_begin.second, block_end.second, r);
- }
- }
+ thread_index_t num_threads =
+ std::min<difference_type>(get_max_threads(),
+ std::min(end1 - begin1, end2 - begin2));
+
+# pragma omp parallel num_threads(num_threads)
+ {
+# pragma omp single
+ {
+ num_threads = omp_get_num_threads();
+
+ borders = new difference_type[num_threads + 2];
+ equally_split(size, num_threads + 1, borders);
+ block_begins = new iterator_pair[num_threads + 1];
+ // Very start.
+ block_begins[0] = std::make_pair(begin1, begin2);
+ lengths = new difference_type[num_threads];
+ } //single
+
+ thread_index_t iam = omp_get_thread_num();
+
+ // Result from multiseq_partition.
+ InputIterator offset[2];
+ const difference_type rank = borders[iam + 1];
+
+ multiseq_partition(sequence, sequence + 2, rank, offset, op.comp);
+
+ // allowed to read?
+ // together
+ // *(offset[ 0 ] - 1) == *offset[ 1 ]
+ if (offset[ 0 ] != begin1 && offset[ 1 ] != end2
+ && !op.comp(*(offset[ 0 ] - 1), *offset[ 1 ])
+ && !op.comp(*offset[ 1 ], *(offset[ 0 ] - 1)))
+ {
+ // Avoid split between globally equal elements: move one to
+ // front in first sequence.
+ --offset[ 0 ];
+ }
+
+ iterator_pair block_end = block_begins[ iam + 1 ] =
+ iterator_pair(offset[ 0 ], offset[ 1 ]);
+
+ // Make sure all threads have their block_begin result written out.
+# pragma omp barrier
+
+ iterator_pair block_begin = block_begins[ iam ];
+
+ // Begin working for the first block, while the others except
+ // the last start to count.
+ if (iam == 0)
+ {
+ // The first thread can copy already.
+ lengths[ iam ] = op.invoke(block_begin.first, block_end.first,
+ block_begin.second, block_end.second,
+ result)
+ - result;
+ }
+ else
+ {
+ lengths[ iam ] = op.count(block_begin.first, block_end.first,
+ block_begin.second, block_end.second);
+ }
+
+ // Make sure everyone wrote their lengths.
+# pragma omp barrier
+
+ OutputIterator r = result;
+
+ if (iam == 0)
+ {
+ // Do the last block.
+ for (int i = 0; i < num_threads; ++i)
+ r += lengths[i];
+
+ block_begin = block_begins[num_threads];
+
+ // Return the result iterator of the last block.
+ return_value = op.invoke(
+ block_begin.first, end1, block_begin.second, end2, r);
+
+ }
+ else
+ {
+ for (int i = 0; i < iam; ++i)
+ r += lengths[ i ];
+
+ // Reset begins for copy pass.
+ op.invoke(block_begin.first, block_end.first,
+ block_begin.second, block_end.second, r);
+ }
+ }
return return_value;
}
- template<typename InputIterator, typename OutputIterator, typename Comparator>
+template<
+ typename InputIterator,
+ typename OutputIterator,
+ typename Comparator>
OutputIterator
parallel_set_union(InputIterator begin1, InputIterator end1,
- InputIterator begin2, InputIterator end2,
- OutputIterator result, Comparator comp)
+ InputIterator begin2, InputIterator end2,
+ OutputIterator result, Comparator comp)
{
return parallel_set_operation(begin1, end1, begin2, end2, result,
- union_func< InputIterator, OutputIterator, Comparator>(comp));
+ union_func< InputIterator, OutputIterator, Comparator>(comp));
}
- template<typename InputIterator, typename OutputIterator, typename Comparator>
+template<
+ typename InputIterator,
+ typename OutputIterator,
+ typename Comparator>
OutputIterator
parallel_set_intersection(InputIterator begin1, InputIterator end1,
- InputIterator begin2, InputIterator end2,
- OutputIterator result, Comparator comp)
+ InputIterator begin2, InputIterator end2,
+ OutputIterator result, Comparator comp)
{
return parallel_set_operation(begin1, end1, begin2, end2, result,
- intersection_func<InputIterator, OutputIterator, Comparator>(comp));
+ intersection_func<InputIterator, OutputIterator, Comparator>(comp));
}
- template<typename InputIterator, typename OutputIterator>
+template<typename InputIterator, typename OutputIterator>
OutputIterator
- set_intersection(InputIterator begin1, InputIterator end1, InputIterator begin2, InputIterator end2, OutputIterator result)
+ set_intersection(InputIterator begin1, InputIterator end1,
+ InputIterator begin2, InputIterator end2,
+ OutputIterator result)
{
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type;
return set_intersection(begin1, end1, begin2, end2, result,
- std::less<value_type>());
+ std::less<value_type>());
}
- template<typename InputIterator, typename OutputIterator, typename Comparator>
+template<
+ typename InputIterator,
+ typename OutputIterator,
+ typename Comparator>
OutputIterator
parallel_set_difference(InputIterator begin1, InputIterator end1,
- InputIterator begin2, InputIterator end2,
- OutputIterator result, Comparator comp)
+ InputIterator begin2, InputIterator end2,
+ OutputIterator result, Comparator comp)
{
return parallel_set_operation(begin1, end1, begin2, end2, result,
- difference_func<InputIterator, OutputIterator, Comparator>(comp));
+ difference_func<InputIterator, OutputIterator, Comparator>(comp));
}
- template<typename InputIterator, typename OutputIterator, typename Comparator>
+template<
+ typename InputIterator,
+ typename OutputIterator,
+ typename Comparator>
OutputIterator
- parallel_set_symmetric_difference(InputIterator begin1, InputIterator end1, InputIterator begin2, InputIterator end2, OutputIterator result, Comparator comp)
+ parallel_set_symmetric_difference(InputIterator begin1, InputIterator end1,
+ InputIterator begin2, InputIterator end2,
+ OutputIterator result, Comparator comp)
{
return parallel_set_operation(begin1, end1, begin2, end2, result,
- symmetric_difference_func<InputIterator, OutputIterator, Comparator>(comp));
+ symmetric_difference_func<InputIterator, OutputIterator, Comparator>
+ (comp));
}
}
#endif // _GLIBCXX_SET_ALGORITHM_
-
-
-
-
-
-
-
-
diff --git a/libstdc++-v3/include/parallel/unique_copy.h b/libstdc++-v3/include/parallel/unique_copy.h
index 93a0304..bf30503 100644
--- a/libstdc++-v3/include/parallel/unique_copy.h
+++ b/libstdc++-v3/include/parallel/unique_copy.h
@@ -44,16 +44,19 @@
namespace __gnu_parallel
{
- /** @brief Parallel std::unique_copy(), without explicit equality predicate.
- * @param first Begin iterator of input sequence.
- * @param last End iterator of input sequence.
- * @param result Begin iterator of result sequence.
- * @param binary_pred Equality predicate.
- * @return End iterator of result sequence. */
- template<typename InputIterator, class OutputIterator, class BinaryPredicate>
+/** @brief Parallel std::unique_copy(), w/o explicit equality predicate.
+ * @param first Begin iterator of input sequence.
+ * @param last End iterator of input sequence.
+ * @param result Begin iterator of result sequence.
+ * @param binary_pred Equality predicate.
+ * @return End iterator of result sequence. */
+template<
+ typename InputIterator,
+ class OutputIterator,
+ class BinaryPredicate>
inline OutputIterator
parallel_unique_copy(InputIterator first, InputIterator last,
- OutputIterator result, BinaryPredicate binary_pred)
+ OutputIterator result, BinaryPredicate binary_pred)
{
_GLIBCXX_CALL(last - first)
@@ -62,126 +65,136 @@ namespace __gnu_parallel
typedef typename traits_type::difference_type difference_type;
difference_type size = last - first;
- int num_threads = __gnu_parallel::get_max_threads();
- difference_type counter[num_threads + 1];
if (size == 0)
return result;
// Let the first thread process two parts.
- difference_type borders[num_threads + 2];
- __gnu_parallel::equally_split(size, num_threads + 1, borders);
+ difference_type *counter;
+ difference_type *borders;
+ thread_index_t num_threads = get_max_threads();
// First part contains at least one element.
-#pragma omp parallel num_threads(num_threads)
- {
- int iam = omp_get_thread_num();
-
- difference_type begin, end;
-
- // Check for length without duplicates
- // Needed for position in output
- difference_type i = 0;
- OutputIterator out = result;
- if (iam == 0)
- {
- begin = borders[0] + 1; // == 1
- end = borders[iam + 1];
-
- i++;
- new (static_cast<void *>(&*out)) value_type(*first);
- out++;
-
- for (InputIterator iter = first + begin; iter < first + end; ++iter)
- {
- if (!binary_pred(*iter, *(iter-1)))
- {
- i++;
- new (static_cast<void *>(&*out)) value_type(*iter);
- out++;
- }
- }
- }
+# pragma omp parallel num_threads(num_threads)
+ {
+# pragma omp single
+ {
+ num_threads = omp_get_num_threads();
+ borders = new difference_type[num_threads + 2];
+ equally_split(size, num_threads + 1, borders);
+ counter = new difference_type[num_threads + 1];
+ }
+
+ thread_index_t iam = omp_get_thread_num();
+
+ difference_type begin, end;
+
+ // Check for length without duplicates
+ // Needed for position in output
+ difference_type i = 0;
+ OutputIterator out = result;
+
+ if (iam == 0)
+ {
+ begin = borders[0] + 1; // == 1
+ end = borders[iam + 1];
+
+ i++;
+ new (static_cast<void *>(&*out)) value_type(*first);
+ out++;
+
+ for (InputIterator iter = first + begin; iter < first + end; ++iter)
+ {
+ if (!binary_pred(*iter, *(iter-1)))
+ {
+ i++;
+ new (static_cast<void *>(&*out)) value_type(*iter);
+ out++;
+ }
+ }
+ }
else
- {
- begin = borders[iam]; //one part
- end = borders[iam + 1];
-
- for (InputIterator iter = first + begin; iter < first + end; ++iter)
- {
- if (!binary_pred(*iter, *(iter-1)))
- {
- i++;
- }
- }
- }
+ {
+ begin = borders[iam]; //one part
+ end = borders[iam + 1];
+
+ for (InputIterator iter = first + begin; iter < first + end; ++iter)
+ {
+ if (!binary_pred(*iter, *(iter-1)))
+ {
+ i++;
+ }
+ }
+ }
counter[iam] = i;
// Last part still untouched.
difference_type begin_output;
-#pragma omp barrier
+# pragma omp barrier
// Store result in output on calculated positions.
begin_output = 0;
if (iam == 0)
- {
- for (int t = 0; t < num_threads; t++)
- begin_output += counter[t];
+ {
+ for (int t = 0; t < num_threads; t++)
+ begin_output += counter[t];
- i = 0;
+ i = 0;
- OutputIterator iter_out = result + begin_output;
+ OutputIterator iter_out = result + begin_output;
- begin = borders[num_threads];
- end = size;
+ begin = borders[num_threads];
+ end = size;
- for (InputIterator iter = first + begin; iter < first + end; ++iter)
- {
- if (iter == first || !binary_pred(*iter, *(iter-1)))
- {
- i++;
- new (static_cast<void *>(&*iter_out)) value_type(*iter);
- iter_out++;
- }
- }
+ for (InputIterator iter = first + begin; iter < first + end; ++iter)
+ {
+ if (iter == first || !binary_pred(*iter, *(iter-1)))
+ {
+ i++;
+ new (static_cast<void *>(&*iter_out)) value_type(*iter);
+ iter_out++;
+ }
+ }
- counter[num_threads] = i;
- }
+ counter[num_threads] = i;
+ }
else
- {
- for (int t = 0; t < iam; t++)
- begin_output += counter[t];
-
- OutputIterator iter_out = result + begin_output;
- for (InputIterator iter = first + begin; iter < first + end; ++iter)
- {
- if (!binary_pred(*iter, *(iter-1)))
- {
- new (static_cast<void *> (&*iter_out)) value_type(*iter);
- iter_out++;
- }
- }
- }
+ {
+ for (int t = 0; t < iam; t++)
+ begin_output += counter[t];
+
+ OutputIterator iter_out = result + begin_output;
+ for (InputIterator iter = first + begin; iter < first + end; ++iter)
+ {
+ if (!binary_pred(*iter, *(iter-1)))
+ {
+ new (static_cast<void *> (&*iter_out)) value_type(*iter);
+ iter_out++;
+ }
+ }
+ }
}
difference_type end_output = 0;
for (int t = 0; t < num_threads + 1; t++)
end_output += counter[t];
+ delete[] borders;
+
return result + end_output;
}
- /** @brief Parallel std::unique_copy(), without explicit equality predicate
- * @param first Begin iterator of input sequence.
- * @param last End iterator of input sequence.
- * @param result Begin iterator of result sequence.
- * @return End iterator of result sequence. */
- template<typename InputIterator, class OutputIterator>
+/** @brief Parallel std::unique_copy(), without explicit equality predicate
+ * @param first Begin iterator of input sequence.
+ * @param last End iterator of input sequence.
+ * @param result Begin iterator of result sequence.
+ * @return End iterator of result sequence. */
+template<typename InputIterator, class OutputIterator>
inline OutputIterator
parallel_unique_copy(InputIterator first, InputIterator last,
- OutputIterator result)
+ OutputIterator result)
{
typedef typename std::iterator_traits<InputIterator>::value_type value_type;
diff --git a/libstdc++-v3/include/parallel/workstealing.h b/libstdc++-v3/include/parallel/workstealing.h
index 7704245..60c8739 100644
--- a/libstdc++-v3/include/parallel/workstealing.h
+++ b/libstdc++-v3/include/parallel/workstealing.h
@@ -55,8 +55,8 @@ namespace __gnu_parallel
#define _GLIBCXX_JOB_VOLATILE volatile
- /** @brief One job for a certain thread. */
- template<typename _DifferenceTp>
+/** @brief One job for a certain thread. */
+template<typename _DifferenceTp>
struct Job
{
typedef _DifferenceTp difference_type;
@@ -78,31 +78,38 @@ namespace __gnu_parallel
_GLIBCXX_JOB_VOLATILE difference_type load;
};
- /** @brief Work stealing algorithm for random access iterators.
- *
- * Uses O(1) additional memory. Synchronization at job lists is
- * done with atomic operations.
- * @param begin Begin iterator of element sequence.
- * @param end End iterator of element sequence.
- * @param op User-supplied functor (comparator, predicate, adding
- * functor, ...).
- * @param f Functor to "process" an element with op (depends on
- * desired functionality, e. g. for std::for_each(), ...).
- * @param r Functor to "add" a single result to the already
- * processed elements (depends on functionality).
- * @param base Base value for reduction.
- * @param output Pointer to position where final result is written to
- * @param bound Maximum number of elements processed (e. g. for
- * std::count_n()).
- * @return User-supplied functor (that may contain a part of the result).
- */
- template<typename RandomAccessIterator, typename Op, typename Fu, typename Red, typename Result>
+/** @brief Work stealing algorithm for random access iterators.
+ *
+ * Uses O(1) additional memory. Synchronization at job lists is
+ * done with atomic operations.
+ * @param begin Begin iterator of element sequence.
+ * @param end End iterator of element sequence.
+ * @param op User-supplied functor (comparator, predicate, adding
+ * functor, ...).
+ * @param f Functor to "process" an element with op (depends on
+ * desired functionality, e. g. for std::for_each(), ...).
+ * @param r Functor to "add" a single result to the already
+ * processed elements (depends on functionality).
+ * @param base Base value for reduction.
+ * @param output Pointer to position where final result is written to
+ * @param bound Maximum number of elements processed (e. g. for
+ * std::count_n()).
+ * @return User-supplied functor (that may contain a part of the result).
+ */
+template<
+ typename RandomAccessIterator,
+ typename Op,
+ typename Fu,
+ typename Red,
+ typename Result>
Op
- for_each_template_random_access_workstealing(RandomAccessIterator begin,
- RandomAccessIterator end,
- Op op, Fu& f, Red r,
- Result base, Result& output,
- typename std::iterator_traits<RandomAccessIterator>::difference_type bound)
+ for_each_template_random_access_workstealing(
+ RandomAccessIterator begin,
+ RandomAccessIterator end,
+ Op op, Fu& f, Red r,
+ Result base, Result& output,
+ typename std::iterator_traits<RandomAccessIterator>::difference_type
+ bound)
{
_GLIBCXX_CALL(end - begin)
@@ -110,182 +117,187 @@ namespace __gnu_parallel
typedef typename traits_type::difference_type difference_type;
- difference_type chunk_size = static_cast<difference_type>(Settings::workstealing_chunk_size);
+ difference_type chunk_size =
+ static_cast<difference_type>(Settings::workstealing_chunk_size);
// How many jobs?
difference_type length = (bound < 0) ? (end - begin) : bound;
// To avoid false sharing in a cache line.
- const int stride = Settings::cache_line_size * 10 / sizeof(Job<difference_type>) + 1;
+ const int stride =
+ Settings::cache_line_size * 10 / sizeof(Job<difference_type>) + 1;
// Total number of threads currently working.
thread_index_t busy = 0;
- thread_index_t num_threads = get_max_threads();
- difference_type num_threads_min = num_threads < end - begin ? num_threads : end - begin;
+
+ Job<difference_type> *job;
omp_lock_t output_lock;
omp_init_lock(&output_lock);
- // No more threads than jobs, at least one thread.
- difference_type num_threads_max = num_threads_min > 1 ? num_threads_min : 1;
- num_threads = static_cast<thread_index_t>(num_threads_max);
-
- // Create job description array.
- Job<difference_type> *job = new Job<difference_type>[num_threads * stride];
-
// Write base value to output.
output = base;
-#pragma omp parallel shared(busy) num_threads(num_threads)
- {
- // Initialization phase.
-
- // Flags for every thread if it is doing productive work.
- bool iam_working = false;
-
- // Thread id.
- thread_index_t iam = omp_get_thread_num();
-
- // This job.
- Job<difference_type>& my_job = job[iam * stride];
-
- // Random number (for work stealing).
- thread_index_t victim;
-
- // Local value for reduction.
- Result result = Result();
-
- // Number of elements to steal in one attempt.
- difference_type steal;
-
- // Every thread has its own random number generator (modulo num_threads).
- random_number rand_gen(iam, num_threads);
-
-#pragma omp atomic
- // This thread is currently working.
- busy++;
-
- iam_working = true;
-
- // How many jobs per thread? last thread gets the rest.
- my_job.first = static_cast<difference_type>(iam * (length / num_threads));
-
- my_job.last = (iam == (num_threads - 1)) ? (length - 1) : ((iam + 1) * (length / num_threads) - 1);
- my_job.load = my_job.last - my_job.first + 1;
-
- // Init result with first value (to have a base value for reduction).
- if (my_job.first <= my_job.last)
- {
- // Cannot use volatile variable directly.
- difference_type my_first = my_job.first;
- result = f(op, begin + my_first);
- my_job.first++;
- my_job.load--;
- }
-
- RandomAccessIterator current;
-
-#pragma omp barrier
-
- // Actual work phase
- // Work on own or stolen start
- while (busy > 0)
- {
- // Work until no productive thread left.
-#pragma omp flush(busy)
-
- // Thread has own work to do
- while (my_job.first <= my_job.last)
- {
- // fetch-and-add call
- // Reserve current job block (size chunk_size) in my queue.
- difference_type current_job = fetch_and_add<difference_type>(&(my_job.first), chunk_size);
-
- // Update load, to make the three values consistent,
- // first might have been changed in the meantime
- my_job.load = my_job.last - my_job.first + 1;
- for (difference_type job_counter = 0; job_counter < chunk_size && current_job <= my_job.last; job_counter++)
- {
- // Yes: process it!
- current = begin + current_job;
- current_job++;
-
- // Do actual work.
- result = r(result, f(op, current));
- }
-
-#pragma omp flush(busy)
-
- }
-
- // After reaching this point, a thread's job list is empty.
- if (iam_working)
- {
-#pragma omp atomic
- // This thread no longer has work.
- busy--;
-
- iam_working = false;
- }
-
- difference_type supposed_first, supposed_last, supposed_load;
- do
- {
- // Find random nonempty deque (not own) and do consistency check.
- yield();
-#pragma omp flush(busy)
- victim = rand_gen();
- supposed_first = job[victim * stride].first;
- supposed_last = job[victim * stride].last;
- supposed_load = job[victim * stride].load;
- }
- while (busy > 0
- && ((supposed_load <= 0) || ((supposed_first + supposed_load - 1) != supposed_last)));
-
- if (busy == 0)
- break;
-
- if (supposed_load > 0)
- {
- // Has work and work to do.
- // Number of elements to steal (at least one).
- steal = (supposed_load < 2) ? 1 : supposed_load / 2;
-
- // Protects against stealing threads
- // omp_set_lock(&(job[victim * stride].lock));
-
- // Push victim's start forward.
- difference_type stolen_first = fetch_and_add<difference_type>(&(job[victim * stride].first), steal);
- difference_type stolen_try = stolen_first + steal - difference_type(1);
-
- // Protects against working thread
- // omp_unset_lock(&(job[victim * stride].lock));
-
- my_job.first = stolen_first;
-
- // Avoid std::min dependencies.
- my_job.last = stolen_try < supposed_last ? stolen_try : supposed_last;
-
- my_job.load = my_job.last - my_job.first + 1;
-
- //omp_unset_lock(&(my_job.lock));
-
-#pragma omp atomic
- // Has potential work again.
- busy++;
- iam_working = true;
-
-#pragma omp flush(busy)
- }
-#pragma omp flush(busy)
- } // end while busy > 0
- // Add accumulated result to output.
- omp_set_lock(&output_lock);
- output = r(output, result);
- omp_unset_lock(&output_lock);
-
- //omp_destroy_lock(&(my_job.lock));
- }
+ // No more threads than jobs, at least one thread.
+ thread_index_t num_threads =
+ __gnu_parallel::max<thread_index_t>(1,
+ __gnu_parallel::min<difference_type>(length, get_max_threads()));
+
+# pragma omp parallel shared(busy) num_threads(num_threads)
+ {
+
+# pragma omp single
+ {
+ num_threads = omp_get_num_threads();
+
+ // Create job description array.
+ job = new Job<difference_type>[num_threads * stride];
+ }
+
+ // Initialization phase.
+
+ // Flags for every thread if it is doing productive work.
+ bool iam_working = false;
+
+ // Thread id.
+ thread_index_t iam = omp_get_thread_num();
+
+ // This job.
+ Job<difference_type>& my_job = job[iam * stride];
+
+ // Random number (for work stealing).
+ thread_index_t victim;
+
+ // Local value for reduction.
+ Result result = Result();
+
+ // Number of elements to steal in one attempt.
+ difference_type steal;
+
+ // Every thread has its own random number generator
+ // (modulo num_threads).
+ random_number rand_gen(iam, num_threads);
+
+ // This thread is currently working.
+# pragma omp atomic
+ busy++;
+
+ iam_working = true;
+
+ // How many jobs per thread? last thread gets the rest.
+ my_job.first =
+ static_cast<difference_type>(iam * (length / num_threads));
+
+ my_job.last = (iam == (num_threads - 1)) ?
+ (length - 1) : ((iam + 1) * (length / num_threads) - 1);
+ my_job.load = my_job.last - my_job.first + 1;
+
+ // Init result with first value (to have a base value for reduction).
+ if (my_job.first <= my_job.last)
+ {
+ // Cannot use volatile variable directly.
+ difference_type my_first = my_job.first;
+ result = f(op, begin + my_first);
+ my_job.first++;
+ my_job.load--;
+ }
+
+ RandomAccessIterator current;
+
+# pragma omp barrier
+
+ // Actual work phase
+ // Work on own or stolen start
+ while (busy > 0)
+ {
+ // Work until no productive thread left.
+# pragma omp flush(busy)
+
+ // Thread has own work to do
+ while (my_job.first <= my_job.last)
+ {
+ // fetch-and-add call
+ // Reserve current job block (size chunk_size) in my queue.
+ difference_type current_job =
+ fetch_and_add<difference_type>(&(my_job.first), chunk_size);
+
+ // Update load, to make the three values consistent,
+ // first might have been changed in the meantime
+ my_job.load = my_job.last - my_job.first + 1;
+ for (difference_type job_counter = 0;
+ job_counter < chunk_size && current_job <= my_job.last;
+ job_counter++)
+ {
+ // Yes: process it!
+ current = begin + current_job;
+ current_job++;
+
+ // Do actual work.
+ result = r(result, f(op, current));
+ }
+
+# pragma omp flush(busy)
+ }
+
+ // After reaching this point, a thread's job list is empty.
+ if (iam_working)
+ {
+ // This thread no longer has work.
+# pragma omp atomic
+ busy--;
+
+ iam_working = false;
+ }
+
+ difference_type supposed_first, supposed_last, supposed_load;
+ do
+ {
+ // Find random nonempty deque (not own), do consistency check.
+ yield();
+# pragma omp flush(busy)
+ victim = rand_gen();
+ supposed_first = job[victim * stride].first;
+ supposed_last = job[victim * stride].last;
+ supposed_load = job[victim * stride].load;
+ }
+ while (busy > 0
+ && ((supposed_load <= 0)
+ || ((supposed_first + supposed_load - 1) != supposed_last)));
+
+ if (busy == 0)
+ break;
+
+ if (supposed_load > 0)
+ {
+ // Has work and work to do.
+ // Number of elements to steal (at least one).
+ steal = (supposed_load < 2) ? 1 : supposed_load / 2;
+
+ // Push victim's start forward.
+ difference_type stolen_first =
+ fetch_and_add<difference_type>(
+ &(job[victim * stride].first), steal);
+ difference_type stolen_try =
+ stolen_first + steal - difference_type(1);
+
+ my_job.first = stolen_first;
+ my_job.last = __gnu_parallel::min(stolen_try, supposed_last);
+ my_job.load = my_job.last - my_job.first + 1;
+
+ // Has potential work again.
+# pragma omp atomic
+ busy++;
+ iam_working = true;
+
+# pragma omp flush(busy)
+ }
+# pragma omp flush(busy)
+ } // end while busy > 0
+ // Add accumulated result to output.
+ omp_set_lock(&output_lock);
+ output = r(output, result);
+ omp_unset_lock(&output_lock);
+ }
delete[] job;