Loading...
Searching...
No Matches
partitioner.hpp
1// reference:
2// - gomp: https://github.com/gcc-mirror/gcc/blob/master/libgomp/iter.c
3// - komp: https://github.com/llvm-mirror/openmp/blob/master/runtime/src/kmp_dispatch.cpp
4
5#pragma once
6
11
12namespace tf {
13
19enum class PartitionerType : int {
24};
25
26
27//template <typename C>
28//class PartitionInvoker : public PartitionerBase {
29//
30// protected
31//
32// C _closure;
33//
34// template <typename... ArgsT>
35// auto operator()(ArgsT&&... args) {
36// return std::invoke(closure, std::forward<ArgsT>(args)...);
37// }
38//
39// template <typename... ArgsT>
40// auto operator()(ArgsT&&... args) const {
41// return std::invoke(closure, std::forward<ArgsT>(args)...);
42// }
43//
44//};
45
52
53
54
55// ----------------------------------------------------------------------------
56// Partitioner Base
57// ----------------------------------------------------------------------------
58
124template <typename C = DefaultClosureWrapper>
126
127 public:
128
132 constexpr static bool is_default_wrapper_v = std::is_same_v<C, DefaultClosureWrapper>;
133
138
142 PartitionerBase() = default;
143
147 explicit PartitionerBase(size_t chunk_size) : _chunk_size {chunk_size} {}
148
153 _chunk_size {chunk_size},
154 _closure_wrapper {std::forward<C>(closure_wrapper)} {
155 }
156
160 size_t chunk_size() const { return _chunk_size; }
161
165 void chunk_size(size_t cz) { _chunk_size = cz; }
166
170 const C& closure_wrapper() const { return _closure_wrapper; }
171
175 C& closure_wrapper() { return _closure_wrapper; }
176
180 template <typename F>
181 void closure_wrapper(F&& fn) { _closure_wrapper = std::forward<F>(fn); }
182
186 template <typename F>
187 TF_FORCE_INLINE decltype(auto) operator () (F&& callable) {
188 if constexpr(is_default_wrapper_v) {
189 return std::forward<F>(callable);
190 }
191 else {
192 // closure wrapper is stateful - capture it by reference
193 return [this, c=std::forward<F>(callable)]() mutable { _closure_wrapper(c); };
194 }
195 }
196
197 protected:
198
202 size_t _chunk_size{0};
203
207 C _closure_wrapper;
208};
209
210// ----------------------------------------------------------------------------
211// Static Partitioner
212// ----------------------------------------------------------------------------
213
261template <typename C = DefaultClosureWrapper>
263
264 public:
265
269 static constexpr PartitionerType type() { return PartitionerType::STATIC; }
270
274 StaticPartitioner() = default;
275
279 explicit StaticPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
280
284 explicit StaticPartitioner(size_t sz, C&& closure) :
285 PartitionerBase<C>(sz, std::forward<C>(closure)) {
286 }
287
295 size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const {
296 return this->_chunk_size ? this->_chunk_size : N/W + (w < N%W);
297 }
298
299 // --------------------------------------------------------------------------
300 // scheduling methods
301 // --------------------------------------------------------------------------
302
306 template <typename F>
307 void loop(
308 size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func
309 ) {
310 size_t stride = W * chunk_size;
311 while(curr_b < N) {
312 size_t curr_e = (std::min)(curr_b + chunk_size, N);
313 func(curr_b, curr_e);
314 curr_b += stride;
315 }
316 }
317
321 template <typename F>
322 void loop_until(
323 size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func
324 ) {
325 size_t stride = W * chunk_size;
326 while(curr_b < N) {
327 size_t curr_e = (std::min)(curr_b + chunk_size, N);
328 if(func(curr_b, curr_e)) {
329 return;
330 }
331 curr_b += stride;
332 }
333 }
334
338 template <IndexRangeMDLike R, typename F>
339 void loop(R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {
340 //size_t N = range.volume();
341 size_t curr_b = next.load(std::memory_order_relaxed);
342 size_t chunk_size = (this->_chunk_size == 0) ? (N+W-1) / W : this->_chunk_size;
343
344 while (curr_b < N) {
345 // calculates the actual valid sub-box and exact consumed amount
346 auto [box, consumed] = range.consume_chunk(curr_b, chunk_size);
347 if (next.compare_exchange_weak(curr_b, curr_b + consumed,
348 std::memory_order_relaxed,
349 std::memory_order_relaxed)) {
350 func(box);
351 curr_b += consumed;
352 }
353 }
354 }
355
356};
357
358// ----------------------------------------------------------------------------
359// Guided Partitioner
360// ----------------------------------------------------------------------------
361
401template <typename C = DefaultClosureWrapper>
403
404 public:
405
409 static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
410
414 GuidedPartitioner() = default;
415
420 explicit GuidedPartitioner(size_t sz) : PartitionerBase<C> (sz) {}
421
425 explicit GuidedPartitioner(size_t sz, C&& closure) :
426 PartitionerBase<C>(sz, std::forward<C>(closure)) {
427 }
428
429 // --------------------------------------------------------------------------
430 // scheduling methods
431 // --------------------------------------------------------------------------
432
436 template <typename F>
437 void loop(
438 size_t N, size_t W, std::atomic<size_t>& next, F&& func
439 ) const {
440
441 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
442
443 size_t p1 = 2 * W * (chunk_size + 1);
444 float p2 = 0.5f / static_cast<float>(W);
445 size_t curr_b = next.load(std::memory_order_relaxed);
446
447 while(curr_b < N) {
448
449 size_t r = N - curr_b;
450
451 // fine-grained
452 if(r < p1) {
453 while(1) {
454 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
455 if(curr_b >= N) {
456 return;
457 }
458 func(curr_b, (std::min)(curr_b + chunk_size, N));
459 }
460 break;
461 }
462 // coarse-grained
463 else {
464 size_t q = static_cast<size_t>(p2 * r);
465 if(q < chunk_size) {
466 q = chunk_size;
467 }
468 //size_t curr_e = (q <= r) ? curr_b + q : N;
469 size_t curr_e = (std::min)(curr_b + q, N);
470 if(next.compare_exchange_strong(curr_b, curr_e, std::memory_order_relaxed,
471 std::memory_order_relaxed)) {
472 func(curr_b, curr_e);
473 //curr_b = next.load(std::memory_order_relaxed);
474 curr_b = curr_e;
475 }
476 }
477 }
478 }
479
483 template <typename F>
484 void loop_until(
485 size_t N, size_t W, std::atomic<size_t>& next, F&& func
486 ) const {
487
488 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
489
490 size_t p1 = 2 * W * (chunk_size + 1);
491 float p2 = 0.5f / static_cast<float>(W);
492 size_t curr_b = next.load(std::memory_order_relaxed);
493
494 while(curr_b < N) {
495
496 size_t r = N - curr_b;
497
498 // fine-grained
499 if(r < p1) {
500 while(1) {
501 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
502 if(curr_b >= N) {
503 return;
504 }
505 if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
506 return;
507 }
508 }
509 break;
510 }
511 // coarse-grained
512 else {
513 size_t q = static_cast<size_t>(p2 * r);
514 if(q < chunk_size) {
515 q = chunk_size;
516 }
517 //size_t curr_e = (q <= r) ? curr_b + q : N;
518 size_t curr_e = (std::min)(curr_b + q, N);
519 if(next.compare_exchange_strong(curr_b, curr_e, std::memory_order_relaxed,
520 std::memory_order_relaxed)) {
521 if(func(curr_b, curr_e)) {
522 return;
523 }
524 //curr_b = next.load(std::memory_order_relaxed);
525 curr_b = curr_e;
526 }
527 }
528 }
529 }
530
534 template <IndexRangeMDLike R, typename F>
535 void loop(R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {
536
537 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
538 size_t requested_chunk_size;
539 size_t p1 = 2 * W * (chunk_size + 1);
540 float p2 = 0.5f / static_cast<float>(W);
541 size_t curr_b = next.load(std::memory_order_relaxed);
542
543 while(curr_b < N) {
544
545 size_t r = N - curr_b;
546
547 // find-grained
548 if(r < p1) {
549 requested_chunk_size = chunk_size;
550 }
551 // coarse
552 else {
553 requested_chunk_size = (std::max)(static_cast<size_t>(p2*r), chunk_size);
554 }
555
556 auto [box, consumed] = range.consume_chunk(curr_b, requested_chunk_size);
557
558 if (next.compare_exchange_weak(curr_b, curr_b + consumed,
559 std::memory_order_relaxed,
560 std::memory_order_relaxed)) {
561 func(box);
562 curr_b += consumed;
563 }
564 }
565 }
566
567};
568
569// ----------------------------------------------------------------------------
570// Dynamic Partitioner
571// ----------------------------------------------------------------------------
572
612template <typename C = DefaultClosureWrapper>
614
615 public:
616
620 static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
621
626
630 explicit DynamicPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
631
635 explicit DynamicPartitioner(size_t sz, C&& closure) :
636 PartitionerBase<C>(sz, std::forward<C>(closure)) {
637 }
638
639 // --------------------------------------------------------------------------
640 // scheduling methods
641 // --------------------------------------------------------------------------
642
646 template <typename F>
647 void loop(
648 size_t N, size_t, std::atomic<size_t>& next, F&& func
649 ) const {
650
651 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
652 size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
653
654 while(curr_b < N) {
655 func(curr_b, (std::min)(curr_b + chunk_size, N));
656 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
657 }
658 }
659
663 template <typename F>
664 void loop_until(
665 size_t N, size_t, std::atomic<size_t>& next, F&& func
666 ) const {
667
668 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
669 size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
670
671 while(curr_b < N) {
672 if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
673 return;
674 }
675 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
676 }
677 }
678
682 template <IndexRangeMDLike R, typename F>
683 void loop(R& range, size_t N, size_t, std::atomic<size_t>& next, F&& func) const {
684 //size_t N = range.volume();
685 size_t curr_b = next.load(std::memory_order_relaxed);
686 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
687
688 while (curr_b < N) {
689 // calculates the actual valid sub-box and exact consumed amount
690 auto [box, consumed] = range.consume_chunk(curr_b, chunk_size);
691 if (next.compare_exchange_weak(curr_b, curr_b + consumed,
692 std::memory_order_relaxed,
693 std::memory_order_relaxed)) {
694 func(box);
695 curr_b += consumed;
696 }
697 }
698 }
699
700};
701
702// ----------------------------------------------------------------------------
703// RandomPartitioner
704// ----------------------------------------------------------------------------
705
745template <typename C = DefaultClosureWrapper>
747
748 public:
749
753 static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
754
758 RandomPartitioner() = default;
759
763 explicit RandomPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
764
768 explicit RandomPartitioner(size_t sz, C&& closure) :
769 PartitionerBase<C>(sz, std::forward<C>(closure)) {
770 }
771
775 RandomPartitioner(float alpha, float beta) : _alpha{alpha}, _beta{beta} {}
776
780 RandomPartitioner(float alpha, float beta, C&& closure) :
781 _alpha {alpha}, _beta {beta},
782 PartitionerBase<C>(0, std::forward<C>(closure)) {
783 }
784
788 float alpha() const { return _alpha; }
789
793 float beta() const { return _beta; }
794
801 std::pair<size_t, size_t> chunk_size_range(size_t N, size_t W) const {
802
803 size_t b1 = static_cast<size_t>(_alpha * N * W);
804 size_t b2 = static_cast<size_t>(_beta * N * W);
805
806 if(b1 > b2) {
807 std::swap(b1, b2);
808 }
809
810 b1 = (std::max)(b1, size_t{1});
811 b2 = (std::max)(b2, b1 + 1);
812
813 return {b1, b2};
814 }
815
816 // --------------------------------------------------------------------------
817 // scheduling methods
818 // --------------------------------------------------------------------------
819
823 template <typename F>
824 void loop(
825 size_t N, size_t W, std::atomic<size_t>& next, F&& func
826 ) const {
827
828 auto [b1, b2] = chunk_size_range(N, W);
829
830 std::default_random_engine engine {std::random_device{}()};
831 std::uniform_int_distribution<size_t> dist(b1, b2);
832
833 size_t chunk_size = dist(engine);
834 size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
835
836 while(curr_b < N) {
837 func(curr_b, (std::min)(curr_b + chunk_size, N));
838 chunk_size = dist(engine);
839 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
840 }
841 }
842
846 template <typename F>
847 void loop_until(
848 size_t N, size_t W, std::atomic<size_t>& next, F&& func
849 ) const {
850
851 auto [b1, b2] = chunk_size_range(N, W);
852
853 std::default_random_engine engine {std::random_device{}()};
854 std::uniform_int_distribution<size_t> dist(b1, b2);
855
856 size_t chunk_size = dist(engine);
857 size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
858
859 while(curr_b < N) {
860 if(func(curr_b, (std::min)(curr_b + chunk_size, N))){
861 return;
862 }
863 chunk_size = dist(engine);
864 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
865 }
866 }
867
871 template <IndexRangeMDLike R, typename F>
872 void loop(R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {
873
874 auto [b1, b2] = chunk_size_range(N, W);
875
876 std::default_random_engine engine{std::random_device{}()};
877 std::uniform_int_distribution<size_t> dist(b1, b2);
878
879 size_t curr_b = next.load(std::memory_order_relaxed);
880
881 while (curr_b < N) {
882 auto [box, consumed] = range.consume_chunk(curr_b, dist(engine));
883 if (next.compare_exchange_weak(curr_b, curr_b + consumed,
884 std::memory_order_relaxed,
885 std::memory_order_relaxed)) {
886 func(box);
887 curr_b += consumed;
888 }
889 // CAS failure reloads curr_b; resample chunk on next iteration naturally
890 }
891 }
892
893 private:
894
895 float _alpha {0.01f};
896 float _beta {0.50f};
897};
898
906
912template <typename P>
913concept Partitioner = std::derived_from<P, PartitionerBase<typename P::closure_wrapper_type>>;
914
922template <typename P>
923inline constexpr bool is_partitioner_v = Partitioner<P>;
924
925} // end of namespace tf -----------------------------------------------------
class to create a default closure wrapper
Definition partitioner.hpp:51
DynamicPartitioner()=default
default constructor
DynamicPartitioner(size_t sz, C &&closure)
construct a dynamic partitioner with the given chunk size and the closure
Definition partitioner.hpp:635
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:620
DynamicPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:630
class to create a guided partitioner for scheduling parallel algorithms
Definition partitioner.hpp:402
GuidedPartitioner(size_t sz, C &&closure)
construct a guided partitioner with the given chunk size and the closure
Definition partitioner.hpp:425
GuidedPartitioner(size_t sz)
construct a guided partitioner with the given chunk size
Definition partitioner.hpp:420
GuidedPartitioner()=default
default constructor
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:409
PartitionerBase(size_t chunk_size)
construct a partitioner with the given chunk size
Definition partitioner.hpp:147
static constexpr bool is_default_wrapper_v
indicating if the given closure wrapper is a default wrapper (i.e., empty)
Definition partitioner.hpp:132
C closure_wrapper_type
the closure type
Definition partitioner.hpp:137
void chunk_size(size_t cz)
update the chunk size of this partitioner
Definition partitioner.hpp:165
const C & closure_wrapper() const
acquire an immutable access to the closure wrapper object
Definition partitioner.hpp:170
void closure_wrapper(F &&fn)
modify the closure wrapper object
Definition partitioner.hpp:181
PartitionerBase(size_t chunk_size, C &&closure_wrapper)
construct a partitioner with the given chunk size and closure wrapper
Definition partitioner.hpp:152
C & closure_wrapper()
acquire a mutable access to the closure wrapper object
Definition partitioner.hpp:175
PartitionerBase()=default
default constructor
size_t chunk_size() const
query the chunk size of this partitioner
Definition partitioner.hpp:160
RandomPartitioner(size_t sz, C &&closure)
construct a random partitioner with the given chunk size and the closure
Definition partitioner.hpp:768
RandomPartitioner(float alpha, float beta, C &&closure)
constructs a random partitioner with the given parameters and the closure
Definition partitioner.hpp:780
std::pair< size_t, size_t > chunk_size_range(size_t N, size_t W) const
queries the range of chunk size
Definition partitioner.hpp:801
RandomPartitioner(float alpha, float beta)
constructs a random partitioner with the given parameters
Definition partitioner.hpp:775
RandomPartitioner()=default
default constructor
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:753
float alpha() const
queries the alpha value
Definition partitioner.hpp:788
RandomPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:763
float beta() const
queries the beta value
Definition partitioner.hpp:793
StaticPartitioner()=default
default constructor
size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const
queries the adjusted chunk size
Definition partitioner.hpp:295
StaticPartitioner(size_t sz)
construct a static partitioner with the given chunk size
Definition partitioner.hpp:279
StaticPartitioner(size_t sz, C &&closure)
construct a static partitioner with the given chunk size and the closure
Definition partitioner.hpp:284
static constexpr PartitionerType type()
queries the partition type (static)
Definition partitioner.hpp:269
determines if a type is a partitioner
Definition partitioner.hpp:913
taskflow namespace
Definition small_vector.hpp:20
@ STATIC
static task type
Definition task.hpp:25
PartitionerType
enumeration of all partitioner types
Definition partitioner.hpp:19
@ DYNAMIC
dynamic partitioner type
Definition partitioner.hpp:23
@ STATIC
static partitioner type
Definition partitioner.hpp:21
constexpr bool is_partitioner_v
determines if a type is a partitioner (variable template)
Definition partitioner.hpp:923
GuidedPartitioner<> DefaultPartitioner
default partitioner set to tf::GuidedPartitioner
Definition partitioner.hpp:905