124template <
typename C = DefaultClosureWrapper>
180 template <
typename F>
186 template <
typename F>
187 TF_FORCE_INLINE
decltype(
auto)
operator () (F&& callable) {
189 return std::forward<F>(callable);
193 return [
this, c=std::forward<F>(callable)]()
mutable { _closure_wrapper(c); };
202 size_t _chunk_size{0};
261template <
typename C = DefaultClosureWrapper>
296 return this->_chunk_size ? this->_chunk_size : N/W + (w < N%W);
306 template <
typename F>
308 size_t N,
size_t W,
size_t curr_b,
size_t chunk_size, F&& func
312 size_t curr_e = (std::min)(curr_b +
chunk_size, N);
313 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>,
bool>) {
314 if(func(curr_b, curr_e)) {
318 func(curr_b, curr_e);
336 template <IndexRangeMDLike R,
typename F>
337 void loop(
const R& range,
size_t N,
size_t W,
size_t curr_b,
size_t chunk_size, F&& func)
const {
340 size_t curr_e = (std::min)(curr_b +
chunk_size, N);
341 while(curr_b < curr_e) {
342 auto [box, consumed] = range.slice_floor(curr_b, curr_e - curr_b);
343 if constexpr (std::is_same_v<std::invoke_result_t<F, R>,
bool>) {
401template <
typename C = DefaultClosureWrapper>
436 template <
typename F>
438 size_t N,
size_t W, std::atomic<size_t>& next, F&& func
441 size_t chunk_size = (this->_chunk_size == 0) ?
size_t{1} : this->_chunk_size;
444 float p2 = 0.5f /
static_cast<float>(W);
445 size_t curr_b = next.load(std::memory_order_relaxed);
449 size_t r = N - curr_b;
454 curr_b = next.fetch_add(
chunk_size, std::memory_order_relaxed);
458 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>,
bool>) {
459 if(func(curr_b, (std::min)(curr_b +
chunk_size, N))) {
463 func(curr_b, (std::min)(curr_b +
chunk_size, N));
470 size_t q =
static_cast<size_t>(p2 * r);
474 size_t curr_e = (std::min)(curr_b + q, N);
475 if(next.compare_exchange_strong(curr_b, curr_e, std::memory_order_relaxed,
476 std::memory_order_relaxed)) {
477 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>,
bool>) {
478 if(func(curr_b, curr_e)) {
482 func(curr_b, curr_e);
493 template <IndexRangeMDLike R,
typename F>
494 void loop(
const R& range,
size_t N,
size_t W, std::atomic<size_t>& next, F&& func)
const {
496 size_t chunk_size = (this->_chunk_size == 0) ?
size_t{1} : this->_chunk_size;
498 float p2 = 0.5f /
static_cast<float>(W);
499 size_t curr_b = next.load(std::memory_order_relaxed);
502 size_t r = N - curr_b;
503 auto [box, consumed] = range.slice_ceil(curr_b,
506 if(next.compare_exchange_weak(curr_b, curr_b + consumed,
507 std::memory_order_relaxed,
508 std::memory_order_relaxed)) {
509 if constexpr (std::is_same_v<std::invoke_result_t<F, R>,
bool>) {
566template <
typename C = DefaultClosureWrapper>
600 template <
typename F>
602 size_t N,
size_t, std::atomic<size_t>& next, F&& func
605 size_t chunk_size = (this->_chunk_size == 0) ?
size_t{1} : this->_chunk_size;
606 size_t curr_b = next.fetch_add(
chunk_size, std::memory_order_relaxed);
609 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>,
bool>) {
610 if(func(curr_b, (std::min)(curr_b +
chunk_size, N))) {
614 func(curr_b, (std::min)(curr_b +
chunk_size, N));
616 curr_b = next.fetch_add(
chunk_size, std::memory_order_relaxed);
623 template <IndexRangeMDLike R,
typename F>
624 void loop(
const R& range,
size_t N,
size_t, std::atomic<size_t>& next, F&& func)
const {
625 size_t curr_b = next.load(std::memory_order_relaxed);
626 size_t chunk_size = (this->_chunk_size == 0) ?
size_t{1} : this->_chunk_size;
629 auto [box, consumed] = range.slice_ceil(curr_b,
chunk_size);
630 if(next.compare_exchange_weak(curr_b, curr_b + consumed,
631 std::memory_order_relaxed,
632 std::memory_order_relaxed)) {
633 if constexpr (std::is_same_v<std::invoke_result_t<F, R>,
bool>) {
690template <
typename C = DefaultClosureWrapper>
733 float alpha()
const {
return _alpha; }
738 float beta()
const {
return _beta; }
748 size_t b1 =
static_cast<size_t>(_alpha * N * W);
749 size_t b2 =
static_cast<size_t>(_beta * N * W);
755 b1 = (std::max)(b1,
size_t{1});
756 b2 = (std::max)(b2, b1 + 1);
768 template <
typename F>
770 size_t N,
size_t W, std::atomic<size_t>& next, F&& func
775 std::default_random_engine engine {std::random_device{}()};
776 std::uniform_int_distribution<size_t> dist(b1, b2);
779 size_t curr_b = next.fetch_add(
chunk_size, std::memory_order_relaxed);
782 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>,
bool>) {
783 if(func(curr_b, (std::min)(curr_b +
chunk_size, N))) {
787 func(curr_b, (std::min)(curr_b +
chunk_size, N));
790 curr_b = next.fetch_add(
chunk_size, std::memory_order_relaxed);
797 template <IndexRangeMDLike R,
typename F>
798 void loop(
const R& range,
size_t N,
size_t W, std::atomic<size_t>& next, F&& func)
const {
802 std::default_random_engine engine{std::random_device{}()};
803 std::uniform_int_distribution<size_t> dist(b1, b2);
805 size_t curr_b = next.load(std::memory_order_relaxed);
808 auto [box, consumed] = range.slice_ceil(curr_b, dist(engine));
809 if(next.compare_exchange_weak(curr_b, curr_b + consumed,
810 std::memory_order_relaxed,
811 std::memory_order_relaxed)) {
812 if constexpr (std::is_same_v<std::invoke_result_t<F, R>,
bool>) {
826 float _alpha {0.01f};
844concept PartitionerLike = std::derived_from<P, PartitionerBase<typename P::closure_wrapper_type>>;
class to create a default closure wrapper
Definition partitioner.hpp:51
class to create a dynamic partitioner for scheduling parallel algorithms
Definition partitioner.hpp:567
DynamicPartitioner()=default
default constructor
DynamicPartitioner(size_t sz, C &&closure)
construct a dynamic partitioner with the given chunk size and the closure
Definition partitioner.hpp:589
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:574
DynamicPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:584
class to create a guided partitioner for scheduling parallel algorithms
Definition partitioner.hpp:402
GuidedPartitioner(size_t sz, C &&closure)
construct a guided partitioner with the given chunk size and the closure
Definition partitioner.hpp:425
GuidedPartitioner(size_t sz)
construct a guided partitioner with the given chunk size
Definition partitioner.hpp:420
GuidedPartitioner()=default
default constructor
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:409
class to derive a partitioner for scheduling parallel algorithms
Definition partitioner.hpp:125
PartitionerBase(size_t chunk_size)
construct a partitioner with the given chunk size
Definition partitioner.hpp:147
static constexpr bool is_default_wrapper_v
indicating if the given closure wrapper is a default wrapper (i.e., empty)
Definition partitioner.hpp:132
C closure_wrapper_type
the closure type
Definition partitioner.hpp:137
void chunk_size(size_t cz)
update the chunk size of this partitioner
Definition partitioner.hpp:165
const C & closure_wrapper() const
acquire an immutable access to the closure wrapper object
Definition partitioner.hpp:170
void closure_wrapper(F &&fn)
modify the closure wrapper object
Definition partitioner.hpp:181
PartitionerBase(size_t chunk_size, C &&closure_wrapper)
construct a partitioner with the given chunk size and closure wrapper
Definition partitioner.hpp:152
C & closure_wrapper()
acquire a mutable access to the closure wrapper object
Definition partitioner.hpp:175
PartitionerBase()=default
default constructor
size_t chunk_size() const
query the chunk size of this partitioner
Definition partitioner.hpp:160
class to construct a random partitioner for scheduling parallel algorithms
Definition partitioner.hpp:691
RandomPartitioner(size_t sz, C &&closure)
construct a random partitioner with the given chunk size and the closure
Definition partitioner.hpp:713
RandomPartitioner(float alpha, float beta, C &&closure)
constructs a random partitioner with the given parameters and the closure
Definition partitioner.hpp:725
std::pair< size_t, size_t > chunk_size_range(size_t N, size_t W) const
queries the range of chunk size
Definition partitioner.hpp:746
RandomPartitioner(float alpha, float beta)
constructs a random partitioner with the given parameters
Definition partitioner.hpp:720
RandomPartitioner()=default
default constructor
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:698
float alpha() const
queries the alpha value
Definition partitioner.hpp:733
RandomPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:708
float beta() const
queries the beta value
Definition partitioner.hpp:738
class to construct a static partitioner for scheduling parallel algorithms
Definition partitioner.hpp:262
StaticPartitioner()=default
default constructor
size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const
queries the adjusted chunk size
Definition partitioner.hpp:295
StaticPartitioner(size_t sz)
construct a static partitioner with the given chunk size
Definition partitioner.hpp:279
StaticPartitioner(size_t sz, C &&closure)
construct a static partitioner with the given chunk size and the closure
Definition partitioner.hpp:284
static constexpr PartitionerType type()
queries the partition type (static)
Definition partitioner.hpp:269
determines if a type is a partitioner
Definition partitioner.hpp:844
taskflow namespace
Definition small_vector.hpp:20
PartitionerType
enumeration of all partitioner types
Definition partitioner.hpp:19
@ DYNAMIC
dynamic partitioner type
@ STATIC
static partitioner type
constexpr bool is_partitioner_v
determines if a type is a partitioner (variable template)
Definition partitioner.hpp:854