Loading...
Searching...
No Matches
partitioner.hpp
1// reference:
2// - gomp: https://github.com/gcc-mirror/gcc/blob/master/libgomp/iter.c
3// - komp: https://github.com/llvm-mirror/openmp/blob/master/runtime/src/kmp_dispatch.cpp
4
5#pragma once
6
11
12namespace tf {
13
19enum class PartitionerType : int {
24};
25
26
27//template <typename C>
28//class PartitionInvoker : public PartitionerBase {
29//
30// protected
31//
32// C _closure;
33//
34// template <typename... ArgsT>
35// auto operator()(ArgsT&&... args) {
36// return std::invoke(closure, std::forward<ArgsT>(args)...);
37// }
38//
39// template <typename... ArgsT>
40// auto operator()(ArgsT&&... args) const {
41// return std::invoke(closure, std::forward<ArgsT>(args)...);
42// }
43//
44//};
45
52
53
54
55// ----------------------------------------------------------------------------
56// Partitioner Base
57// ----------------------------------------------------------------------------
58
124template <typename C = DefaultClosureWrapper>
126
127 public:
128
132 constexpr static bool is_default_wrapper_v = std::is_same_v<C, DefaultClosureWrapper>;
133
138
142 PartitionerBase() = default;
143
148
156
160 size_t chunk_size() const { return _chunk_size; }
161
165 void chunk_size(size_t cz) { _chunk_size = cz; }
166
170 const C& closure_wrapper() const { return _closure_wrapper; }
171
176
180 template <typename F>
181 void closure_wrapper(F&& fn) { _closure_wrapper = std::forward<F>(fn); }
182
186 template <typename F>
187 TF_FORCE_INLINE decltype(auto) operator () (F&& callable) {
188 if constexpr(is_default_wrapper_v) {
189 return std::forward<F>(callable);
190 }
191 else {
192 // closure wrapper is stateful - capture it by reference
193 return [this, c=std::forward<F>(callable)]() mutable { _closure_wrapper(c); };
194 }
195 }
196
197 protected:
198
202 size_t _chunk_size{0};
203
208};
209
210// ----------------------------------------------------------------------------
211// Guided Partitioner
212// ----------------------------------------------------------------------------
213
253template <typename C = DefaultClosureWrapper>
255
256 public:
257
261 static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
262
266 GuidedPartitioner() = default;
267
272 explicit GuidedPartitioner(size_t sz) : PartitionerBase<C> (sz) {}
273
277 explicit GuidedPartitioner(size_t sz, C&& closure) :
278 PartitionerBase<C>(sz, std::forward<C>(closure)) {
279 }
280
281 // --------------------------------------------------------------------------
282 // scheduling methods
283 // --------------------------------------------------------------------------
284
288 template <typename F>
289 requires std::invocable<F, size_t, size_t>
290 void loop(
291 size_t N, size_t W, std::atomic<size_t>& next, F&& func
292 ) const {
293
294 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
295
296 size_t p1 = 2 * W * (chunk_size + 1);
297 float p2 = 0.5f / static_cast<float>(W);
298 size_t curr_b = next.load(std::memory_order_relaxed);
299
300 while(curr_b < N) {
301
302 size_t r = N - curr_b;
303
304 // fine-grained
305 if(r < p1) {
306 while(1) {
307 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
308 if(curr_b >= N) {
309 return;
310 }
311 func(curr_b, (std::min)(curr_b + chunk_size, N));
312 }
313 break;
314 }
315 // coarse-grained
316 else {
317 size_t q = static_cast<size_t>(p2 * r);
318 if(q < chunk_size) {
319 q = chunk_size;
320 }
321 //size_t curr_e = (q <= r) ? curr_b + q : N;
322 size_t curr_e = (std::min)(curr_b + q, N);
323 if(next.compare_exchange_strong(curr_b, curr_e, std::memory_order_relaxed,
324 std::memory_order_relaxed)) {
325 func(curr_b, curr_e);
326 curr_b = next.load(std::memory_order_relaxed);
327 }
328 }
329 }
330 }
331
335 template <typename F>
336 requires (std::invocable<F, size_t, size_t> && std::same_as<std::invoke_result_t<F, size_t, size_t>, bool>)
337 void loop_until(
338 size_t N, size_t W, std::atomic<size_t>& next, F&& func
339 ) const {
340
341 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
342
343 size_t p1 = 2 * W * (chunk_size + 1);
344 float p2 = 0.5f / static_cast<float>(W);
345 size_t curr_b = next.load(std::memory_order_relaxed);
346
347 while(curr_b < N) {
348
349 size_t r = N - curr_b;
350
351 // fine-grained
352 if(r < p1) {
353 while(1) {
354 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
355 if(curr_b >= N) {
356 return;
357 }
358 if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
359 return;
360 }
361 }
362 break;
363 }
364 // coarse-grained
365 else {
366 size_t q = static_cast<size_t>(p2 * r);
367 if(q < chunk_size) {
368 q = chunk_size;
369 }
370 //size_t curr_e = (q <= r) ? curr_b + q : N;
371 size_t curr_e = (std::min)(curr_b + q, N);
372 if(next.compare_exchange_strong(curr_b, curr_e, std::memory_order_relaxed,
373 std::memory_order_relaxed)) {
374 if(func(curr_b, curr_e)) {
375 return;
376 }
377 curr_b = next.load(std::memory_order_relaxed);
378 }
379 }
380 }
381 }
382
383};
384
385// ----------------------------------------------------------------------------
386// Dynamic Partitioner
387// ----------------------------------------------------------------------------
388
428template <typename C = DefaultClosureWrapper>
430
431 public:
432
436 static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
437
442
446 explicit DynamicPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
447
451 explicit DynamicPartitioner(size_t sz, C&& closure) :
452 PartitionerBase<C>(sz, std::forward<C>(closure)) {
453 }
454
455 // --------------------------------------------------------------------------
456 // scheduling methods
457 // --------------------------------------------------------------------------
458
462 template <typename F>
463 requires std::invocable<F, size_t, size_t>
464 void loop(
465 size_t N, size_t, std::atomic<size_t>& next, F&& func
466 ) const {
467
468 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
469 size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
470
471 while(curr_b < N) {
472 func(curr_b, (std::min)(curr_b + chunk_size, N));
473 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
474 }
475 }
476
480 template <typename F>
481 requires (std::invocable<F, size_t, size_t> && std::same_as<std::invoke_result_t<F, size_t, size_t>, bool>)
482 void loop_until(
483 size_t N, size_t, std::atomic<size_t>& next, F&& func
484 ) const {
485
486 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
487 size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
488
489 while(curr_b < N) {
490 if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
491 return;
492 }
493 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
494 }
495 }
496
497};
498
499// ----------------------------------------------------------------------------
500// Static Partitioner
501// ----------------------------------------------------------------------------
502
550template <typename C = DefaultClosureWrapper>
552
553 public:
554
558 static constexpr PartitionerType type() { return PartitionerType::STATIC; }
559
563 StaticPartitioner() = default;
564
568 explicit StaticPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
569
573 explicit StaticPartitioner(size_t sz, C&& closure) :
574 PartitionerBase<C>(sz, std::forward<C>(closure)) {
575 }
576
584 size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const {
585 return this->_chunk_size ? this->_chunk_size : N/W + (w < N%W);
586 }
587
588 // --------------------------------------------------------------------------
589 // scheduling methods
590 // --------------------------------------------------------------------------
591
595 template <typename F>
596 requires std::invocable<F, size_t, size_t>
597 void loop(
598 size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func
599 ) {
600 size_t stride = W * chunk_size;
601 while(curr_b < N) {
602 size_t curr_e = (std::min)(curr_b + chunk_size, N);
603 func(curr_b, curr_e);
604 curr_b += stride;
605 }
606 }
607
611 template <typename F>
612 requires (std::invocable<F, size_t, size_t> && std::same_as<std::invoke_result_t<F, size_t, size_t>, bool>)
613 void loop_until(
614 size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func
615 ) {
616 size_t stride = W * chunk_size;
617 while(curr_b < N) {
618 size_t curr_e = (std::min)(curr_b + chunk_size, N);
619 if(func(curr_b, curr_e)) {
620 return;
621 }
622 curr_b += stride;
623 }
624 }
625};
626
627// ----------------------------------------------------------------------------
628// RandomPartitioner
629// ----------------------------------------------------------------------------
630
670template <typename C = DefaultClosureWrapper>
672
673 public:
674
678 static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
679
683 RandomPartitioner() = default;
684
688 explicit RandomPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
689
693 explicit RandomPartitioner(size_t sz, C&& closure) :
694 PartitionerBase<C>(sz, std::forward<C>(closure)) {
695 }
696
700 RandomPartitioner(float alpha, float beta) : _alpha{alpha}, _beta{beta} {}
701
705 RandomPartitioner(float alpha, float beta, C&& closure) :
706 _alpha {alpha}, _beta {beta},
707 PartitionerBase<C>(0, std::forward<C>(closure)) {
708 }
709
713 float alpha() const { return _alpha; }
714
718 float beta() const { return _beta; }
719
726 std::pair<size_t, size_t> chunk_size_range(size_t N, size_t W) const {
727
728 size_t b1 = static_cast<size_t>(_alpha * N * W);
729 size_t b2 = static_cast<size_t>(_beta * N * W);
730
731 if(b1 > b2) {
732 std::swap(b1, b2);
733 }
734
735 b1 = (std::max)(b1, size_t{1});
736 b2 = (std::max)(b2, b1 + 1);
737
738 return {b1, b2};
739 }
740
741 // --------------------------------------------------------------------------
742 // scheduling methods
743 // --------------------------------------------------------------------------
744
748 template <typename F>
749 requires std::invocable<F, size_t, size_t>
750 void loop(
751 size_t N, size_t W, std::atomic<size_t>& next, F&& func
752 ) const {
753
754 auto [b1, b2] = chunk_size_range(N, W);
755
756 std::default_random_engine engine {std::random_device{}()};
757 std::uniform_int_distribution<size_t> dist(b1, b2);
758
759 size_t chunk_size = dist(engine);
760 size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
761
762 while(curr_b < N) {
763 func(curr_b, (std::min)(curr_b + chunk_size, N));
764 chunk_size = dist(engine);
765 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
766 }
767 }
768
772 template <typename F>
773 requires (std::invocable<F, size_t, size_t> && std::same_as<std::invoke_result_t<F, size_t, size_t>, bool>)
774 void loop_until(
775 size_t N, size_t W, std::atomic<size_t>& next, F&& func
776 ) const {
777
778 auto [b1, b2] = chunk_size_range(N, W);
779
780 std::default_random_engine engine {std::random_device{}()};
781 std::uniform_int_distribution<size_t> dist(b1, b2);
782
783 size_t chunk_size = dist(engine);
784 size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
785
786 while(curr_b < N) {
787 if(func(curr_b, (std::min)(curr_b + chunk_size, N))){
788 return;
789 }
790 chunk_size = dist(engine);
791 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
792 }
793 }
794
795 private:
796
797 float _alpha {0.01f};
798 float _beta {0.50f};
799};
800
808
814template <typename P>
815concept Partitioner = std::derived_from<P, PartitionerBase<typename P::closure_wrapper_type>>;
816
824template <typename P>
825inline constexpr bool is_partitioner_v = Partitioner<P>;
826
827} // end of namespace tf -----------------------------------------------------
class to create a default closure wrapper
Definition partitioner.hpp:51
DynamicPartitioner()=default
default constructor
DynamicPartitioner(size_t sz, C &&closure)
construct a dynamic partitioner with the given chunk size and the closure
Definition partitioner.hpp:451
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:436
DynamicPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:446
class to create a guided partitioner for scheduling parallel algorithms
Definition partitioner.hpp:254
GuidedPartitioner(size_t sz, C &&closure)
construct a guided partitioner with the given chunk size and the closure
Definition partitioner.hpp:277
GuidedPartitioner(size_t sz)
construct a guided partitioner with the given chunk size
Definition partitioner.hpp:272
GuidedPartitioner()=default
default constructor
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:261
PartitionerBase(size_t chunk_size)
construct a partitioner with the given chunk size
Definition partitioner.hpp:147
static constexpr bool is_default_wrapper_v
indicating if the given closure wrapper is a default wrapper (i.e., empty)
Definition partitioner.hpp:132
C closure_wrapper_type
the closure type
Definition partitioner.hpp:137
void chunk_size(size_t cz)
update the chunk size of this partitioner
Definition partitioner.hpp:165
const C & closure_wrapper() const
acquire an immutable access to the closure wrapper object
Definition partitioner.hpp:170
void closure_wrapper(F &&fn)
modify the closure wrapper object
Definition partitioner.hpp:181
C _closure_wrapper
closure wrapper
Definition partitioner.hpp:207
PartitionerBase(size_t chunk_size, C &&closure_wrapper)
construct a partitioner with the given chunk size and closure wrapper
Definition partitioner.hpp:152
size_t _chunk_size
chunk size
Definition partitioner.hpp:202
C & closure_wrapper()
acquire a mutable access to the closure wrapper object
Definition partitioner.hpp:175
PartitionerBase()=default
default constructor
size_t chunk_size() const
query the chunk size of this partitioner
Definition partitioner.hpp:160
RandomPartitioner(size_t sz, C &&closure)
construct a random partitioner with the given chunk size and the closure
Definition partitioner.hpp:693
RandomPartitioner(float alpha, float beta, C &&closure)
constructs a random partitioner with the given parameters and the closure
Definition partitioner.hpp:705
std::pair< size_t, size_t > chunk_size_range(size_t N, size_t W) const
queries the range of chunk size
Definition partitioner.hpp:726
RandomPartitioner(float alpha, float beta)
constructs a random partitioner with the given parameters
Definition partitioner.hpp:700
RandomPartitioner()=default
default constructor
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:678
float alpha() const
queries the alpha value
Definition partitioner.hpp:713
RandomPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:688
float beta() const
queries the beta value
Definition partitioner.hpp:718
StaticPartitioner()=default
default constructor
size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const
queries the adjusted chunk size
Definition partitioner.hpp:584
StaticPartitioner(size_t sz)
construct a static partitioner with the given chunk size
Definition partitioner.hpp:568
StaticPartitioner(size_t sz, C &&closure)
construct a static partitioner with the given chunk size and the closure
Definition partitioner.hpp:573
static constexpr PartitionerType type()
queries the partition type (static)
Definition partitioner.hpp:558
determines if a type is a partitioner
Definition partitioner.hpp:815
taskflow namespace
Definition small_vector.hpp:20
@ STATIC
static task type
Definition task.hpp:25
PartitionerType
enumeration of all partitioner types
Definition partitioner.hpp:19
@ DYNAMIC
dynamic partitioner type
Definition partitioner.hpp:23
@ STATIC
static partitioner type
Definition partitioner.hpp:21
constexpr bool is_partitioner_v
determines if a type is a partitioner (variable template)
Definition partitioner.hpp:825
GuidedPartitioner<> DefaultPartitioner
default partitioner set to tf::GuidedPartitioner
Definition partitioner.hpp:807