Loading...
Searching...
No Matches
partitioner.hpp
1// reference:
2// - gomp: https://github.com/gcc-mirror/gcc/blob/master/libgomp/iter.c
3// - komp: https://github.com/llvm-mirror/openmp/blob/master/runtime/src/kmp_dispatch.cpp
4
5#pragma once
6
12namespace tf {
13
19enum class PartitionerType : int {
21 STATIC,
24};
25
26
27//template <typename C>
28//class PartitionInvoker : public PartitionerBase {
29//
30// protected
31//
32// C _closure;
33//
34// template <typename... ArgsT>
35// auto operator()(ArgsT&&... args) {
36// return std::invoke(closure, std::forward<ArgsT>(args)...);
37// }
38//
39// template <typename... ArgsT>
40// auto operator()(ArgsT&&... args) const {
41// return std::invoke(closure, std::forward<ArgsT>(args)...);
42// }
43//
44//};
45
52
53
54
55// ----------------------------------------------------------------------------
56// Partitioner Base
57// ----------------------------------------------------------------------------
58
124template <typename C = DefaultClosureWrapper>
126
127 public:
128
132 constexpr static bool is_default_wrapper_v = std::is_same_v<C, DefaultClosureWrapper>;
133
138
142 PartitionerBase() = default;
143
147 explicit PartitionerBase(size_t chunk_size) : _chunk_size {chunk_size} {}
148
153 _chunk_size {chunk_size},
154 _closure_wrapper {std::forward<C>(closure_wrapper)} {
155 }
156
160 size_t chunk_size() const { return _chunk_size; }
161
165 void chunk_size(size_t cz) { _chunk_size = cz; }
166
170 const C& closure_wrapper() const { return _closure_wrapper; }
171
175 C& closure_wrapper() { return _closure_wrapper; }
176
180 template <typename F>
181 void closure_wrapper(F&& fn) { _closure_wrapper = std::forward<F>(fn); }
182
186 template <typename F>
187 TF_FORCE_INLINE decltype(auto) operator () (F&& callable) {
188 if constexpr(is_default_wrapper_v) {
189 return std::forward<F>(callable);
190 }
191 else {
192 // closure wrapper is stateful - capture it by reference
193 return [this, c=std::forward<F>(callable)]() mutable { _closure_wrapper(c); };
194 }
195 }
196
197 protected:
198
202 size_t _chunk_size{0};
203
207 C _closure_wrapper;
208};
209
210// ----------------------------------------------------------------------------
211// Static Partitioner
212// ----------------------------------------------------------------------------
213
261template <typename C = DefaultClosureWrapper>
263
264 public:
265
269 static constexpr PartitionerType type() { return PartitionerType::STATIC; }
270
274 StaticPartitioner() = default;
275
279 explicit StaticPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
280
284 explicit StaticPartitioner(size_t sz, C&& closure) :
285 PartitionerBase<C>(sz, std::forward<C>(closure)) {
286 }
287
295 size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const {
296 return this->_chunk_size ? this->_chunk_size : N/W + (w < N%W);
297 }
298
299 // --------------------------------------------------------------------------
300 // scheduling methods
301 // --------------------------------------------------------------------------
302
306 template <typename F>
307 void loop(
308 size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func
309 ) {
310 size_t stride = W * chunk_size;
311 while(curr_b < N) {
312 size_t curr_e = (std::min)(curr_b + chunk_size, N);
313 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
314 if(func(curr_b, curr_e)) {
315 return;
316 }
317 } else {
318 func(curr_b, curr_e);
319 }
320 curr_b += stride;
321 }
322 }
323
336 template <IndexRangeMDLike R, typename F>
337 void loop(const R& range, size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func) const {
338 size_t stride = W * chunk_size;
339 while(curr_b < N) {
340 size_t curr_e = (std::min)(curr_b + chunk_size, N);
341 while(curr_b < curr_e) {
342 auto [box, consumed] = range.slice_floor(curr_b, curr_e - curr_b);
343 if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
344 if(func(box)) {
345 return;
346 }
347 } else {
348 func(box);
349 }
350 curr_b += consumed;
351 }
352 curr_b += stride - chunk_size;
353 }
354 }
355
356};
357
358// ----------------------------------------------------------------------------
359// Guided Partitioner
360// ----------------------------------------------------------------------------
361
401template <typename C = DefaultClosureWrapper>
403
404 public:
405
409 static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
410
414 GuidedPartitioner() = default;
415
420 explicit GuidedPartitioner(size_t sz) : PartitionerBase<C> (sz) {}
421
425 explicit GuidedPartitioner(size_t sz, C&& closure) :
426 PartitionerBase<C>(sz, std::forward<C>(closure)) {
427 }
428
429 // --------------------------------------------------------------------------
430 // scheduling methods
431 // --------------------------------------------------------------------------
432
436 template <typename F>
437 void loop(
438 size_t N, size_t W, std::atomic<size_t>& next, F&& func
439 ) const {
440
441 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
442
443 size_t p1 = 2 * W * (chunk_size + 1);
444 float p2 = 0.5f / static_cast<float>(W);
445 size_t curr_b = next.load(std::memory_order_relaxed);
446
447 while(curr_b < N) {
448
449 size_t r = N - curr_b;
450
451 // fine-grained
452 if(r < p1) {
453 while(1) {
454 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
455 if(curr_b >= N) {
456 return;
457 }
458 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
459 if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
460 return;
461 }
462 } else {
463 func(curr_b, (std::min)(curr_b + chunk_size, N));
464 }
465 }
466 break;
467 }
468 // coarse-grained
469 else {
470 size_t q = static_cast<size_t>(p2 * r);
471 if(q < chunk_size) {
472 q = chunk_size;
473 }
474 size_t curr_e = (std::min)(curr_b + q, N);
475 if(next.compare_exchange_strong(curr_b, curr_e, std::memory_order_relaxed,
476 std::memory_order_relaxed)) {
477 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
478 if(func(curr_b, curr_e)) {
479 return;
480 }
481 } else {
482 func(curr_b, curr_e);
483 }
484 curr_b = curr_e;
485 }
486 }
487 }
488 }
489
493 template <IndexRangeMDLike R, typename F>
494 void loop(const R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {
495
496 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
497 size_t p1 = 2 * W * (chunk_size + 1);
498 float p2 = 0.5f / static_cast<float>(W);
499 size_t curr_b = next.load(std::memory_order_relaxed);
500
501 while(curr_b < N) {
502 size_t r = N - curr_b;
503 auto [box, consumed] = range.slice_ceil(curr_b,
504 (r < p1) ? chunk_size : (std::max)(static_cast<size_t>(p2 * r), chunk_size)
505 );
506 if(next.compare_exchange_weak(curr_b, curr_b + consumed,
507 std::memory_order_relaxed,
508 std::memory_order_relaxed)) {
509 if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
510 if(func(box)) {
511 return;
512 }
513 } else {
514 func(box);
515 }
516 curr_b += consumed;
517 }
518 }
519 }
520
521};
522
523// ----------------------------------------------------------------------------
524// Dynamic Partitioner
525// ----------------------------------------------------------------------------
526
566template <typename C = DefaultClosureWrapper>
568
569 public:
570
574 static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
575
580
584 explicit DynamicPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
585
589 explicit DynamicPartitioner(size_t sz, C&& closure) :
590 PartitionerBase<C>(sz, std::forward<C>(closure)) {
591 }
592
593 // --------------------------------------------------------------------------
594 // scheduling methods
595 // --------------------------------------------------------------------------
596
600 template <typename F>
601 void loop(
602 size_t N, size_t, std::atomic<size_t>& next, F&& func
603 ) const {
604
605 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
606 size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
607
608 while(curr_b < N) {
609 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
610 if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
611 return;
612 }
613 } else {
614 func(curr_b, (std::min)(curr_b + chunk_size, N));
615 }
616 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
617 }
618 }
619
623 template <IndexRangeMDLike R, typename F>
624 void loop(const R& range, size_t N, size_t, std::atomic<size_t>& next, F&& func) const {
625 size_t curr_b = next.load(std::memory_order_relaxed);
626 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
627
628 while(curr_b < N) {
629 auto [box, consumed] = range.slice_ceil(curr_b, chunk_size);
630 if(next.compare_exchange_weak(curr_b, curr_b + consumed,
631 std::memory_order_relaxed,
632 std::memory_order_relaxed)) {
633 if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
634 if(func(box)) {
635 return;
636 }
637 } else {
638 func(box);
639 }
640 curr_b += consumed;
641 }
642 }
643 }
644
645};
646
647// ----------------------------------------------------------------------------
648// RandomPartitioner
649// ----------------------------------------------------------------------------
650
690template <typename C = DefaultClosureWrapper>
692
693 public:
694
698 static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
699
703 RandomPartitioner() = default;
704
708 explicit RandomPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
709
713 explicit RandomPartitioner(size_t sz, C&& closure) :
714 PartitionerBase<C>(sz, std::forward<C>(closure)) {
715 }
716
720 RandomPartitioner(float alpha, float beta) : _alpha{alpha}, _beta{beta} {}
721
725 RandomPartitioner(float alpha, float beta, C&& closure) :
726 _alpha {alpha}, _beta {beta},
727 PartitionerBase<C>(0, std::forward<C>(closure)) {
728 }
729
733 float alpha() const { return _alpha; }
734
738 float beta() const { return _beta; }
739
746 std::pair<size_t, size_t> chunk_size_range(size_t N, size_t W) const {
747
748 size_t b1 = static_cast<size_t>(_alpha * N * W);
749 size_t b2 = static_cast<size_t>(_beta * N * W);
750
751 if(b1 > b2) {
752 std::swap(b1, b2);
753 }
754
755 b1 = (std::max)(b1, size_t{1});
756 b2 = (std::max)(b2, b1 + 1);
757
758 return {b1, b2};
759 }
760
761 // --------------------------------------------------------------------------
762 // scheduling methods
763 // --------------------------------------------------------------------------
764
768 template <typename F>
769 void loop(
770 size_t N, size_t W, std::atomic<size_t>& next, F&& func
771 ) const {
772
773 auto [b1, b2] = chunk_size_range(N, W);
774
775 std::default_random_engine engine {std::random_device{}()};
776 std::uniform_int_distribution<size_t> dist(b1, b2);
777
778 size_t chunk_size = dist(engine);
779 size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
780
781 while(curr_b < N) {
782 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
783 if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
784 return;
785 }
786 } else {
787 func(curr_b, (std::min)(curr_b + chunk_size, N));
788 }
789 chunk_size = dist(engine);
790 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
791 }
792 }
793
797 template <IndexRangeMDLike R, typename F>
798 void loop(const R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {
799
800 auto [b1, b2] = chunk_size_range(N, W);
801
802 std::default_random_engine engine{std::random_device{}()};
803 std::uniform_int_distribution<size_t> dist(b1, b2);
804
805 size_t curr_b = next.load(std::memory_order_relaxed);
806
807 while(curr_b < N) {
808 auto [box, consumed] = range.slice_ceil(curr_b, dist(engine));
809 if(next.compare_exchange_weak(curr_b, curr_b + consumed,
810 std::memory_order_relaxed,
811 std::memory_order_relaxed)) {
812 if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
813 if(func(box)) {
814 return;
815 }
816 } else {
817 func(box);
818 }
819 curr_b += consumed;
820 }
821 }
822 }
823
824 private:
825
826 float _alpha {0.01f};
827 float _beta {0.50f};
828};
829
837
843template <typename P>
844concept PartitionerLike = std::derived_from<P, PartitionerBase<typename P::closure_wrapper_type>>;
845
853template <typename P>
854inline constexpr bool is_partitioner_v = PartitionerLike<P>;
855
856} // end of namespace tf -----------------------------------------------------
class to create a default closure wrapper
Definition partitioner.hpp:51
class to create a dynamic partitioner for scheduling parallel algorithms
Definition partitioner.hpp:567
DynamicPartitioner()=default
default constructor
DynamicPartitioner(size_t sz, C &&closure)
construct a dynamic partitioner with the given chunk size and the closure
Definition partitioner.hpp:589
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:574
DynamicPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:584
class to create a guided partitioner for scheduling parallel algorithms
Definition partitioner.hpp:402
GuidedPartitioner(size_t sz, C &&closure)
construct a guided partitioner with the given chunk size and the closure
Definition partitioner.hpp:425
GuidedPartitioner(size_t sz)
construct a guided partitioner with the given chunk size
Definition partitioner.hpp:420
GuidedPartitioner()=default
default constructor
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:409
class to derive a partitioner for scheduling parallel algorithms
Definition partitioner.hpp:125
PartitionerBase(size_t chunk_size)
construct a partitioner with the given chunk size
Definition partitioner.hpp:147
static constexpr bool is_default_wrapper_v
indicating if the given closure wrapper is a default wrapper (i.e., empty)
Definition partitioner.hpp:132
C closure_wrapper_type
the closure type
Definition partitioner.hpp:137
void chunk_size(size_t cz)
update the chunk size of this partitioner
Definition partitioner.hpp:165
const C & closure_wrapper() const
acquire an immutable access to the closure wrapper object
Definition partitioner.hpp:170
void closure_wrapper(F &&fn)
modify the closure wrapper object
Definition partitioner.hpp:181
PartitionerBase(size_t chunk_size, C &&closure_wrapper)
construct a partitioner with the given chunk size and closure wrapper
Definition partitioner.hpp:152
C & closure_wrapper()
acquire a mutable access to the closure wrapper object
Definition partitioner.hpp:175
PartitionerBase()=default
default constructor
size_t chunk_size() const
query the chunk size of this partitioner
Definition partitioner.hpp:160
class to construct a random partitioner for scheduling parallel algorithms
Definition partitioner.hpp:691
RandomPartitioner(size_t sz, C &&closure)
construct a random partitioner with the given chunk size and the closure
Definition partitioner.hpp:713
RandomPartitioner(float alpha, float beta, C &&closure)
constructs a random partitioner with the given parameters and the closure
Definition partitioner.hpp:725
std::pair< size_t, size_t > chunk_size_range(size_t N, size_t W) const
queries the range of chunk size
Definition partitioner.hpp:746
RandomPartitioner(float alpha, float beta)
constructs a random partitioner with the given parameters
Definition partitioner.hpp:720
RandomPartitioner()=default
default constructor
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:698
float alpha() const
queries the alpha value
Definition partitioner.hpp:733
RandomPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:708
float beta() const
queries the beta value
Definition partitioner.hpp:738
class to construct a static partitioner for scheduling parallel algorithms
Definition partitioner.hpp:262
StaticPartitioner()=default
default constructor
size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const
queries the adjusted chunk size
Definition partitioner.hpp:295
StaticPartitioner(size_t sz)
construct a static partitioner with the given chunk size
Definition partitioner.hpp:279
StaticPartitioner(size_t sz, C &&closure)
construct a static partitioner with the given chunk size and the closure
Definition partitioner.hpp:284
static constexpr PartitionerType type()
queries the partition type (static)
Definition partitioner.hpp:269
determines if a type is a partitioner
Definition partitioner.hpp:844
taskflow namespace
Definition small_vector.hpp:20
@ STATIC
static task type
PartitionerType
enumeration of all partitioner types
Definition partitioner.hpp:19
@ DYNAMIC
dynamic partitioner type
@ STATIC
static partitioner type
constexpr bool is_partitioner_v
determines if a type is a partitioner (variable template)
Definition partitioner.hpp:854