diff options
Diffstat (limited to 'gdbsupport/parallel-for.h')
-rw-r--r-- | gdbsupport/parallel-for.h | 223 |
1 files changed, 101 insertions, 122 deletions
diff --git a/gdbsupport/parallel-for.h b/gdbsupport/parallel-for.h index de9ebb1..fc4c30d 100644 --- a/gdbsupport/parallel-for.h +++ b/gdbsupport/parallel-for.h @@ -38,11 +38,7 @@ template<typename T> struct par_for_accumulator { public: - - explicit par_for_accumulator (size_t n_threads) - : m_futures (n_threads) - { - } + explicit par_for_accumulator (size_t n_threads) : m_futures (n_threads) {} /* The result type that is accumulated. */ typedef std::vector<T> result_type; @@ -71,13 +67,9 @@ public: } /* Resize the results to N. */ - void resize (size_t n) - { - m_futures.resize (n); - } + void resize (size_t n) { m_futures.resize (n); } private: - /* A vector of futures coming from the tasks run in the background. */ std::vector<gdb::future<T>> m_futures; @@ -88,11 +80,7 @@ template<> struct par_for_accumulator<void> { public: - - explicit par_for_accumulator (size_t n_threads) - : m_futures (n_threads) - { - } + explicit par_for_accumulator (size_t n_threads) : m_futures (n_threads) {} /* This specialization does not compute results. */ typedef void result_type; @@ -109,23 +97,19 @@ public: for (auto &future : m_futures) { - /* Use 'get' and not 'wait', to propagate any exception. */ - future.get (); + /* Use 'get' and not 'wait', to propagate any exception. */ + future.get (); } } /* Resize the results to N. */ - void resize (size_t n) - { - m_futures.resize (n); - } + void resize (size_t n) { m_futures.resize (n); } private: - std::vector<gdb::future<void>> m_futures; }; -} +} // namespace detail /* A very simple "parallel for". This splits the range of iterators into subranges, and then passes each subrange to the callback. The @@ -144,15 +128,14 @@ private: the number of threads that were used. */ template<class RandomIt, class RangeFunction> -typename gdb::detail::par_for_accumulator< - typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type - >::result_type +typename gdb::detail::par_for_accumulator<typename gdb::invoke_result< + RangeFunction, RandomIt, RandomIt>::type>::result_type parallel_for_each (unsigned n, RandomIt first, RandomIt last, - RangeFunction callback, - gdb::function_view<size_t(RandomIt)> task_size = nullptr) + RangeFunction callback, + gdb::function_view<size_t (RandomIt)> task_size = nullptr) { - using result_type - = typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type; + using result_type = + typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type; /* If enabled, print debug info about how the work is distributed across the threads. */ @@ -170,34 +153,34 @@ parallel_for_each (unsigned n, RandomIt first, RandomIt last, if (n_threads > 1) { if (task_size != nullptr) - { - gdb_assert (n == 1); - for (RandomIt i = first; i != last; ++i) - { - size_t element_size = task_size (i); - gdb_assert (element_size > 0); - if (element_size > max_element_size) - /* We could start scaling here, but that doesn't seem to be + { + gdb_assert (n == 1); + for (RandomIt i = first; i != last; ++i) + { + size_t element_size = task_size (i); + gdb_assert (element_size > 0); + if (element_size > max_element_size) + /* We could start scaling here, but that doesn't seem to be worth the effort. */ - element_size = max_element_size; - size_t prev_total_size = total_size; - total_size += element_size; - /* Check for overflow. */ - gdb_assert (prev_total_size < total_size); - } - size_per_thread = total_size / n_threads; - } + element_size = max_element_size; + size_t prev_total_size = total_size; + total_size += element_size; + /* Check for overflow. */ + gdb_assert (prev_total_size < total_size); + } + size_per_thread = total_size / n_threads; + } else - { - /* Require that there should be at least N elements in a + { + /* Require that there should be at least N elements in a thread. */ - gdb_assert (n > 0); - if (n_elements / n_threads < n) - n_threads = std::max (n_elements / n, (size_t) 1); - elts_per_thread = n_elements / n_threads; - elts_left_over = n_elements % n_threads; - /* n_elements == n_threads * elts_per_thread + elts_left_over. */ - } + gdb_assert (n > 0); + if (n_elements / n_threads < n) + n_threads = std::max (n_elements / n, (size_t) 1); + elts_per_thread = n_elements / n_threads; + elts_left_over = n_elements % n_threads; + /* n_elements == n_threads * elts_per_thread + elts_left_over. */ + } } size_t count = n_threads == 0 ? 0 : n_threads - 1; @@ -205,17 +188,20 @@ parallel_for_each (unsigned n, RandomIt first, RandomIt last, if (parallel_for_each_debug) { - debug_printf (_("Parallel for: n_elements: %zu\n"), n_elements); + debug_printf (_ ("Parallel for: n_elements: %zu\n"), n_elements); if (task_size != nullptr) - { - debug_printf (_("Parallel for: total_size: %zu\n"), total_size); - debug_printf (_("Parallel for: size_per_thread: %zu\n"), size_per_thread); - } + { + debug_printf (_ ("Parallel for: total_size: %zu\n"), total_size); + debug_printf (_ ("Parallel for: size_per_thread: %zu\n"), + size_per_thread); + } else - { - debug_printf (_("Parallel for: minimum elements per thread: %u\n"), n); - debug_printf (_("Parallel for: elts_per_thread: %zu\n"), elts_per_thread); - } + { + debug_printf (_ ("Parallel for: minimum elements per thread: %u\n"), + n); + debug_printf (_ ("Parallel for: elts_per_thread: %zu\n"), + elts_per_thread); + } } size_t remaining_size = total_size; @@ -224,81 +210,77 @@ parallel_for_each (unsigned n, RandomIt first, RandomIt last, RandomIt end; size_t chunk_size = 0; if (task_size == nullptr) - { - end = first + elts_per_thread; - if (i < elts_left_over) - /* Distribute the leftovers over the worker threads, to avoid having + { + end = first + elts_per_thread; + if (i < elts_left_over) + /* Distribute the leftovers over the worker threads, to avoid having to handle all of them in a single thread. */ - end++; - } + end++; + } else - { - RandomIt j; - for (j = first; j < last && chunk_size < size_per_thread; ++j) - { - size_t element_size = task_size (j); - if (element_size > max_element_size) - element_size = max_element_size; - chunk_size += element_size; - } - end = j; - remaining_size -= chunk_size; - } + { + RandomIt j; + for (j = first; j < last && chunk_size < size_per_thread; ++j) + { + size_t element_size = task_size (j); + if (element_size > max_element_size) + element_size = max_element_size; + chunk_size += element_size; + } + end = j; + remaining_size -= chunk_size; + } /* This case means we don't have enough elements to really distribute them. Rather than ever submit a task that does nothing, we short-circuit here. */ if (first == end) - end = last; + end = last; if (end == last) - { - /* We're about to dispatch the last batch of elements, which + { + /* We're about to dispatch the last batch of elements, which we normally process in the main thread. So just truncate the result list here. This avoids submitting empty tasks to the thread pool. */ - count = i; - results.resize (count); - break; - } + count = i; + results.resize (count); + break; + } if (parallel_for_each_debug) - { - debug_printf (_("Parallel for: elements on worker thread %i\t: %zu"), - i, (size_t)(end - first)); - if (task_size != nullptr) - debug_printf (_("\t(size: %zu)"), chunk_size); - debug_printf (_("\n")); - } - results.post (i, [=] () { - return callback (first, end); - }); + debug_printf (_ ( + "Parallel for: elements on worker thread %i\t: %zu"), + i, (size_t) (end - first)); + if (task_size != nullptr) + debug_printf (_ ("\t(size: %zu)"), chunk_size); + debug_printf (_ ("\n")); + } + results.post (i, [=] () { return callback (first, end); }); first = end; } for (int i = count; i < n_worker_threads; ++i) if (parallel_for_each_debug) { - debug_printf (_("Parallel for: elements on worker thread %i\t: 0"), i); - if (task_size != nullptr) - debug_printf (_("\t(size: 0)")); - debug_printf (_("\n")); + debug_printf (_ ("Parallel for: elements on worker thread %i\t: 0"), + i); + if (task_size != nullptr) + debug_printf (_ ("\t(size: 0)")); + debug_printf (_ ("\n")); } /* Process all the remaining elements in the main thread. */ if (parallel_for_each_debug) { - debug_printf (_("Parallel for: elements on main thread\t\t: %zu"), - (size_t)(last - first)); + debug_printf (_ ("Parallel for: elements on main thread\t\t: %zu"), + (size_t) (last - first)); if (task_size != nullptr) - debug_printf (_("\t(size: %zu)"), remaining_size); - debug_printf (_("\n")); + debug_printf (_ ("\t(size: %zu)"), remaining_size); + debug_printf (_ ("\n")); } - return results.finish ([=] () - { - return callback (first, last); - }); + return results.finish ([=] () { return callback (first, last); }); } /* A sequential drop-in replacement of parallel_for_each. This can be useful @@ -306,24 +288,21 @@ parallel_for_each (unsigned n, RandomIt first, RandomIt last, multi-threading in a fine-grained way. */ template<class RandomIt, class RangeFunction> -typename gdb::detail::par_for_accumulator< - typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type - >::result_type +typename gdb::detail::par_for_accumulator<typename gdb::invoke_result< + RangeFunction, RandomIt, RandomIt>::type>::result_type sequential_for_each (unsigned n, RandomIt first, RandomIt last, - RangeFunction callback, - gdb::function_view<size_t(RandomIt)> task_size = nullptr) + RangeFunction callback, + gdb::function_view<size_t (RandomIt)> task_size = nullptr) { - using result_type = typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type; + using result_type = + typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type; gdb::detail::par_for_accumulator<result_type> results (0); /* Process all the remaining elements in the main thread. */ - return results.finish ([=] () - { - return callback (first, last); - }); + return results.finish ([=] () { return callback (first, last); }); } -} +} // namespace gdb #endif /* GDBSUPPORT_PARALLEL_FOR_H */ |