diff options
author | Dvorskiy, Mikhail <mikhail.dvorskiy@intel.com> | 2020-09-14 14:20:32 +0300 |
---|---|---|
committer | Dvorskiy, Mikhail <mikhail.dvorskiy@intel.com> | 2020-09-14 14:21:54 +0300 |
commit | 0b2e0e80d963f3498705a38e8d02eafe541ca9d6 (patch) | |
tree | 1dd50622a8834fb2fd36b6b82d17d577ada83a20 /pstl | |
parent | 5cac85c931d95f3c94f79837a3bf406eb68edaeb (diff) | |
download | llvm-0b2e0e80d963f3498705a38e8d02eafe541ca9d6.zip llvm-0b2e0e80d963f3498705a38e8d02eafe541ca9d6.tar.gz llvm-0b2e0e80d963f3498705a38e8d02eafe541ca9d6.tar.bz2 |
[pstl] Support Threading Building Blocks 2020 (oneTBB) for "tbb" parallel backend.
After the changes the "tbb" parallel backend will work with old TBB versions(TBB_INTERFACE_VERSION <= 12000) and new ones (TBB 2020 and greater)
More about oneTBB:
https://github.com/oneapi-src/oneTBB
Phabricator Review:
https://reviews.llvm.org/D87380
Diffstat (limited to 'pstl')
-rw-r--r-- | pstl/include/pstl/internal/parallel_backend_tbb.h | 448 |
1 files changed, 369 insertions, 79 deletions
diff --git a/pstl/include/pstl/internal/parallel_backend_tbb.h b/pstl/include/pstl/internal/parallel_backend_tbb.h index a9ea0c7..f1836aa 100644 --- a/pstl/include/pstl/internal/parallel_backend_tbb.h +++ b/pstl/include/pstl/internal/parallel_backend_tbb.h @@ -25,6 +25,7 @@ #include <tbb/parallel_invoke.h> #include <tbb/task_arena.h> #include <tbb/tbb_allocator.h> +#include <tbb/task.h> #if TBB_INTERFACE_VERSION < 10000 # error Intel(R) Threading Building Blocks 2018 is required; older versions are not supported. @@ -71,7 +72,11 @@ class __buffer inline void __cancel_execution() { +#if TBB_INTERFACE_VERSION <= 12000 tbb::task::self().group()->cancel_group_execution(); +#else + tbb::task::current_context()->cancel_group_execution(); +#endif } //------------------------------------------------------------------------ @@ -413,17 +418,308 @@ __parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up __u, _Tp __init, _ //------------------------------------------------------------------------ #define _PSTL_MERGE_CUT_OFF 2000 +template <typename _Func> +class __func_task; +template <typename _Func> +class __root_task; + +#if TBB_INTERFACE_VERSION <= 12000 +class __task : public tbb::task +{ + public: + template <typename _Fn> + __task* + make_continuation(_Fn&& __f) + { + return new (allocate_continuation()) __func_task<typename std::decay<_Fn>::type>(std::forward<_Fn>(__f)); + } + + template <typename _Fn> + __task* + make_child_of(__task* parent, _Fn&& __f) + { + return new (parent->allocate_child()) __func_task<typename std::decay<_Fn>::type>(std::forward<_Fn>(__f)); + } + + template <typename _Fn> + __task* + make_additional_child_of(tbb::task* parent, _Fn&& __f) + { + return new (tbb::task::allocate_additional_child_of(*parent)) + __func_task<typename std::decay<_Fn>::type>(std::forward<_Fn>(__f)); + } + + inline void + recycle_as_continuation() + { + tbb::task::recycle_as_continuation(); + } + + inline void + recycle_as_child_of(__task* parent) + { + tbb::task::recycle_as_child_of(*parent); + } + + inline void + spawn(__task* __t) + { + tbb::task::spawn(*__t); + } + + template <typename _Fn> + static inline void + spawn_root_and_wait(__root_task<_Fn>& __root) + { + tbb::task::spawn_root_and_wait(*__root._M_task); + } +}; + +template <typename _Func> +class __func_task : public __task +{ + _Func _M_func; + + tbb::task* + execute() + { + return _M_func(this); + }; + + public: + template <typename _Fn> + __func_task(_Fn&& __f) : _M_func{std::forward<_Fn>(__f)} + { + } + + _Func& + body() + { + return _M_func; + } +}; + +template <typename _Func> +class __root_task +{ + tbb::task* _M_task; + + public: + template <typename... Args> + __root_task(Args&&... args) + : _M_task{new (tbb::task::allocate_root()) __func_task<_Func>{_Func(std::forward<Args>(args)...)}} + { + } + + friend class __task; + friend class __func_task<_Func>; +}; + +#else // TBB_INTERFACE_VERSION <= 12000 +class __task : public tbb::detail::d1::task +{ + protected: + tbb::detail::d1::small_object_allocator _M_allocator{}; + tbb::detail::d1::execution_data* _M_execute_data{}; + __task* _M_parent{}; + std::atomic<int> _M_refcount{}; + bool _M_recycle{}; + + template <typename _Fn> + __task* + allocate_func_task(_Fn&& __f) + { + assert(_M_execute_data != nullptr); + tbb::detail::d1::small_object_allocator __alloc{}; + auto __t = + __alloc.new_object<__func_task<typename std::decay<_Fn>::type>>(*_M_execute_data, std::forward<_Fn>(__f)); + __t->_M_allocator = __alloc; + return __t; + } + + public: + __task* + parent() + { + return _M_parent; + } + + void + set_ref_count(int __n) + { + _M_refcount.store(__n, std::memory_order_release); + } + + template <typename _Fn> + __task* + make_continuation(_Fn&& __f) + { + auto __t = allocate_func_task(std::forward<_Fn&&>(__f)); + __t->_M_parent = _M_parent; + _M_parent = nullptr; + return __t; + } + + template <typename _Fn> + __task* + make_child_of(__task* __parent, _Fn&& __f) + { + auto __t = allocate_func_task(std::forward<_Fn&&>(__f)); + __t->_M_parent = __parent; + return __t; + } + + template <typename _Fn> + __task* + make_additional_child_of(__task* __parent, _Fn&& __f) + { + auto __t = make_child_of(__parent, std::forward<_Fn>(__f)); + assert(__parent->_M_refcount.load(std::memory_order_relaxed) > 0); + ++__parent->_M_refcount; + return __t; + } + + inline void + recycle_as_continuation() + { + _M_recycle = true; + } + + inline void + recycle_as_child_of(__task* parent) + { + _M_recycle = true; + _M_parent = parent; + } + + inline void + spawn(__task* __t) + { + assert(_M_execute_data != nullptr); + tbb::detail::d1::spawn(*__t, *_M_execute_data->context); + } + + template <typename _Fn> + static inline void + spawn_root_and_wait(__root_task<_Fn>& __root) + { + tbb::detail::d1::execute_and_wait(*__root._M_func_task, __root._M_context, __root._M_wait_object, + __root._M_context); + } + + template <typename _Func> + friend class __func_task; +}; + +template <typename _Func> +class __func_task : public __task +{ + _Func _M_func; + + __task* + execute(tbb::detail::d1::execution_data& __ed) override + { + _M_execute_data = &__ed; + _M_recycle = false; + __task* __next = _M_func(this); + return finalize(__next); + }; + + __task* + cancel(tbb::detail::d1::execution_data& __ed) override + { + return finalize(nullptr); + } + + __task* + finalize(__task* __next) + { + bool __recycle = _M_recycle; + _M_recycle = false; + + if (__recycle) + { + return __next; + } + + auto __parent = _M_parent; + auto __alloc = _M_allocator; + auto __ed = _M_execute_data; + + this->~__func_task(); + + assert(__parent != nullptr); + assert(__parent->_M_refcount.load(std::memory_order_relaxed) > 0); + if (--__parent->_M_refcount == 0) + { + assert(__next == nullptr); + __alloc.deallocate(this, *__ed); + return __parent; + } + + return __next; + } + + friend class __root_task<_Func>; + + public: + template <typename _Fn> + __func_task(_Fn&& __f) : _M_func(std::forward<_Fn>(__f)) + { + } + + _Func& + body() + { + return _M_func; + } +}; + +template <typename _Func> +class __root_task : public __task +{ + __task* + execute(tbb::detail::d1::execution_data& __ed) override + { + _M_wait_object.release(); + return nullptr; + }; + + __task* + cancel(tbb::detail::d1::execution_data& __ed) override + { + _M_wait_object.release(); + return nullptr; + } + + __func_task<_Func>* _M_func_task{}; + tbb::detail::d1::wait_context _M_wait_object{0}; + tbb::task_group_context _M_context{}; + + public: + template <typename... Args> + __root_task(Args&&... args) : _M_wait_object{1} + { + tbb::detail::d1::small_object_allocator __alloc{}; + _M_func_task = __alloc.new_object<__func_task<_Func>>(_Func(std::forward<Args>(args)...)); + _M_func_task->_M_allocator = __alloc; + _M_func_task->_M_parent = this; + _M_refcount.store(1, std::memory_order_relaxed); + } + + friend class __task; +}; +#endif // TBB_INTERFACE_VERSION <= 12000 + template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Compare, typename _Cleanup, typename _LeafMerge> -class __merge_task : public tbb::task +class __merge_func { typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; typedef typename std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; typedef typename std::common_type<_DifferenceType1, _DifferenceType2>::type _SizeType; typedef typename std::iterator_traits<_RandomAccessIterator1>::value_type _ValueType; - /*override*/ tbb::task* - execute(); _RandomAccessIterator1 _M_x_beg; _RandomAccessIterator2 _M_z_beg; @@ -529,7 +825,7 @@ class __merge_task : public tbb::task }; public: - __merge_task(_SizeType __xs, _SizeType __xe, _SizeType __ys, _SizeType __ye, _SizeType __zs, _Compare __comp, + __merge_func(_SizeType __xs, _SizeType __xe, _SizeType __ys, _SizeType __ye, _SizeType __zs, _Compare __comp, _Cleanup, _LeafMerge __leaf_merge, _SizeType __nsort, _RandomAccessIterator1 __x_beg, _RandomAccessIterator2 __z_beg, bool __x_orig, bool __y_orig, bool __root) : _M_xs(__xs), _M_xe(__xe), _M_ys(__ys), _M_ye(__ye), _M_zs(__zs), _M_x_beg(__x_beg), _M_z_beg(__z_beg), @@ -554,12 +850,14 @@ class __merge_task : public tbb::task _y_orig = __on_off; } + __task* + operator()(__task* __self); + private: - __merge_task* - parent_merge() const + __merge_func* + parent_merge(__task* __self) const { - tbb::task* p = (_root ? nullptr : parent()); - return static_cast<__merge_task*>(p); + return _root ? nullptr : &static_cast<__func_task<__merge_func>*>(__self->parent())->body(); } bool x_less_y() @@ -615,8 +913,8 @@ class __merge_task : public tbb::task _y_orig = !_y_orig; } - tbb::task* - merge_ranges() + __task* + merge_ranges(__task* __self) { assert(_x_orig == _y_orig); //two merged subrange must be lie into the same buffer @@ -626,7 +924,7 @@ class __merge_task : public tbb::task // need to merge {x} and {y} if (__n > __merge_cut_off) - return split_merging(); + return split_merging(__self); //merge to buffer if (_x_orig) @@ -634,7 +932,7 @@ class __merge_task : public tbb::task _M_leaf_merge(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs, _M_comp, __move_value_construct(), __move_value_construct(), __move_range_construct(), __move_range_construct()); - assert(parent_merge()); //not root merging task + assert(parent_merge(__self)); //not root merging task } //merge to "origin" else @@ -656,13 +954,13 @@ class __merge_task : public tbb::task return nullptr; } - tbb::task* - process_ranges() + __task* + process_ranges(__task* __self) { assert(_x_orig == _y_orig); assert(!_split); - auto p = parent_merge(); + auto p = parent_merge(__self); if (!p) { //root merging task @@ -685,7 +983,7 @@ class __merge_task : public tbb::task move_y_range(); //parallel moving } // need to merge {x} and {y}. - return merge_ranges(); + return merge_ranges(__self); } //else: not root merging task (parent_merge() == NULL) //optimization, just for sort algorithm, //{x} <= {y} @@ -699,12 +997,12 @@ class __merge_task : public tbb::task const auto id_range = _M_zs; p->set_odd(id_range, !_x_orig); - return merge_ranges(); + return merge_ranges(__self); } //splitting as merge task into 2 of the same level - tbb::task* - split_merging() + __task* + split_merging(__task* __self) { assert(_x_orig == _y_orig); const auto __nx = (_M_xe - _M_xs); @@ -732,43 +1030,42 @@ class __merge_task : public tbb::task } auto __zm = _M_zs + ((__xm - _M_xs) + (__ym - _M_ys)); + __merge_func __right_func(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _Cleanup(), _M_leaf_merge, _M_nsort, + _M_x_beg, _M_z_beg, _x_orig, _y_orig, _root); + __right_func._split = true; + auto __merge_task = __self->make_additional_child_of(__self->parent(), std::move(__right_func)); + __self->spawn(__merge_task); + __self->recycle_as_continuation(); - __merge_task* __right = new (tbb::task::allocate_additional_child_of(*parent())) - __merge_task(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _Cleanup(), _M_leaf_merge, _M_nsort, _M_x_beg, - _M_z_beg, _x_orig, _y_orig, _root); - - __right->_split = true; - - tbb::task::spawn(*__right); - tbb::task::recycle_as_continuation(); _M_xe = __xm; _M_ye = __ym; _split = true; - return this; + return __self; } }; template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename __M_Compare, typename _Cleanup, typename _LeafMerge> -tbb::task* -__merge_task<_RandomAccessIterator1, _RandomAccessIterator2, __M_Compare, _Cleanup, _LeafMerge>::execute() +__task* +__merge_func<_RandomAccessIterator1, _RandomAccessIterator2, __M_Compare, _Cleanup, _LeafMerge>:: +operator()(__task* __self) { //a. split merge task into 2 of the same level; the special logic, //without processing(process_ranges) adjacent sub-ranges x and y if (_split) - return merge_ranges(); + return merge_ranges(__self); //b. General merging of adjacent sub-ranges x and y (with optimization in case of {x} <= {y} ) //1. x and y are in the even buffer //2. x and y are in the odd buffer if (_x_orig == _y_orig) - return process_ranges(); + return process_ranges(__self); //3. x is in even buffer, y is in the odd buffer //4. x is in odd buffer, y is in the even buffer - if (!parent_merge()) + if (!parent_merge(__self)) { //root merge task if (_x_orig) move_x_range(); @@ -788,11 +1085,11 @@ __merge_task<_RandomAccessIterator1, _RandomAccessIterator2, __M_Compare, _Clean move_y_range(); } - return process_ranges(); + return process_ranges(__self); } template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Compare, typename _LeafSort> -class __stable_sort_task : public tbb::task +class __stable_sort_func { public: typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; @@ -800,8 +1097,6 @@ class __stable_sort_task : public tbb::task typedef typename std::common_type<_DifferenceType1, _DifferenceType2>::type _SizeType; private: - /*override*/ tbb::task* - execute(); _RandomAccessIterator1 _M_xs, _M_xe, _M_x_beg; _RandomAccessIterator2 _M_zs, _M_z_beg; _Compare _M_comp; @@ -810,22 +1105,25 @@ class __stable_sort_task : public tbb::task _SizeType _M_nsort; //zero or number of elements to be sorted for partial_sort alforithm public: - __stable_sort_task(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __zs, + __stable_sort_func(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __zs, bool __root, _Compare __comp, _LeafSort __leaf_sort, _SizeType __nsort, _RandomAccessIterator1 __x_beg, _RandomAccessIterator2 __z_beg) : _M_xs(__xs), _M_xe(__xe), _M_x_beg(__x_beg), _M_zs(__zs), _M_z_beg(__z_beg), _M_comp(__comp), _M_leaf_sort(__leaf_sort), _M_root(__root), _M_nsort(__nsort) { } + + __task* + operator()(__task* __self); }; #define _PSTL_STABLE_SORT_CUT_OFF 500 template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Compare, typename _LeafSort> -tbb::task* -__stable_sort_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _LeafSort>::execute() +__task* +__stable_sort_func<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _LeafSort>::operator()(__task* __self) { - typedef __merge_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, __utils::__serial_destroy, + typedef __merge_func<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, __utils::__serial_destroy, __utils::__serial_move_merge> _MergeTaskType; @@ -835,34 +1133,27 @@ __stable_sort_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _Le if (__n <= __sort_cut_off) { _M_leaf_sort(_M_xs, _M_xe, _M_comp); - assert(!_M_root); - - tbb::task* p = parent(); - const auto id_range = _M_xs - _M_x_beg; - return nullptr; } const _RandomAccessIterator1 __xm = _M_xs + __n / 2; const _RandomAccessIterator2 __zm = _M_zs + (__xm - _M_xs); const _RandomAccessIterator2 __ze = _M_zs + __n; - _MergeTaskType* __m = new (allocate_continuation()) _MergeTaskType( - _M_xs - _M_x_beg, __xm - _M_x_beg, __xm - _M_x_beg, _M_xe - _M_x_beg, _M_zs - _M_z_beg, _M_comp, - __utils::__serial_destroy(), __utils::__serial_move_merge(__nmerge), _M_nsort, _M_x_beg, _M_z_beg, - /*x_orig*/ true, /*y_orig*/ true, /*root*/ _M_root); - + _MergeTaskType __m(_MergeTaskType(_M_xs - _M_x_beg, __xm - _M_x_beg, __xm - _M_x_beg, _M_xe - _M_x_beg, + _M_zs - _M_z_beg, _M_comp, __utils::__serial_destroy(), + __utils::__serial_move_merge(__nmerge), _M_nsort, _M_x_beg, _M_z_beg, + /*x_orig*/ true, /*y_orig*/ true, /*root*/ _M_root)); + auto __parent = __self->make_continuation(std::move(__m)); + __parent->set_ref_count(2); + auto __right = __self->make_child_of( + __parent, __stable_sort_func(__xm, _M_xe, __zm, false, _M_comp, _M_leaf_sort, _M_nsort, _M_x_beg, _M_z_beg)); + __self->spawn(__right); + __self->recycle_as_child_of(__parent); _M_root = false; - - __m->set_ref_count(2); - auto __right = new (__m->allocate_child()) - __stable_sort_task(__xm, _M_xe, __zm, _M_root, _M_comp, _M_leaf_sort, _M_nsort, _M_x_beg, _M_z_beg); - - spawn(*__right); - recycle_as_child_of(*__m); _M_xe = __xm; - return this; + return __self; } template <class _ExecutionPolicy, typename _RandomAccessIterator, typename _Compare, typename _LeafSort> @@ -882,11 +1173,9 @@ __parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __xs, _RandomAc if (__n > __sort_cut_off) { __buffer<_ValueType> __buf(__n); - tbb::task* root = new (tbb::task::allocate_root()) - __stable_sort_task<_RandomAccessIterator, _ValueType*, _Compare, _LeafSort>( - __xs, __xe, __buf.get(), true, __comp, __leaf_sort, __nsort, __xs, __buf.get()); - tbb::task::spawn_root_and_wait(*root); - + __root_task<__stable_sort_func<_RandomAccessIterator, _ValueType*, _Compare, _LeafSort>> __root{ + __xs, __xe, __buf.get(), true, __comp, __leaf_sort, __nsort, __xs, __buf.get()}; + __task::spawn_root_and_wait(__root); return; } //serial sort @@ -899,10 +1188,8 @@ __parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __xs, _RandomAc //------------------------------------------------------------------------ template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _RandomAccessIterator3, typename _Compare, typename _LeafMerge> -class __merge_task_static : public tbb::task +class __merge_func_static { - /*override*/ tbb::task* - execute(); _RandomAccessIterator1 _M_xs, _M_xe; _RandomAccessIterator2 _M_ys, _M_ye; _RandomAccessIterator3 _M_zs; @@ -910,20 +1197,23 @@ class __merge_task_static : public tbb::task _LeafMerge _M_leaf_merge; public: - __merge_task_static(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, + __merge_func_static(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) : _M_xs(__xs), _M_xe(__xe), _M_ys(__ys), _M_ye(__ye), _M_zs(__zs), _M_comp(__comp), _M_leaf_merge(__leaf_merge) { } + + __task* + operator()(__task* __self); }; //TODO: consider usage of parallel_for with a custom blocked_range template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _RandomAccessIterator3, typename __M_Compare, typename _LeafMerge> -tbb::task* -__merge_task_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, __M_Compare, - _LeafMerge>::execute() +__task* +__merge_func_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, __M_Compare, _LeafMerge>:: +operator()(__task* __self) { typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; typedef typename std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; @@ -949,14 +1239,14 @@ __merge_task_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAcces __ym = std::lower_bound(_M_ys, _M_ye, *__xm, _M_comp); } const _RandomAccessIterator3 __zm = _M_zs + ((__xm - _M_xs) + (__ym - _M_ys)); - tbb::task* __right = new (tbb::task::allocate_additional_child_of(*parent())) - __merge_task_static(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _M_leaf_merge); - tbb::task::spawn(*__right); - tbb::task::recycle_as_continuation(); + auto __right = __self->make_additional_child_of( + __self->parent(), __merge_func_static(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _M_leaf_merge)); + __self->spawn(__right); + __self->recycle_as_continuation(); _M_xe = __xm; _M_ye = __ym; - return this; + return __self; } template <class _ExecutionPolicy, typename _RandomAccessIterator1, typename _RandomAccessIterator2, @@ -979,11 +1269,11 @@ __parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __xs, _RandomAccessI else { tbb::this_task_arena::isolate([=]() { - typedef __merge_task_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, + typedef __merge_func_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, _Compare, _LeafMerge> _TaskType; - tbb::task::spawn_root_and_wait(*new (tbb::task::allocate_root()) - _TaskType(__xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge)); + __root_task<_TaskType> __root{__xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge}; + __task::spawn_root_and_wait(__root); }); } } |