tf::FlowBuilder class

class to build a task dependency graph

The class provides essential methods to construct a task dependency graph from which tf::Taskflow and tf::Subflow are derived.

Derived classes

class Subflow
class to construct a subflow graph from the execution of a dynamic task
class Taskflow
class to create a taskflow object

Constructors, destructors, conversion operators

FlowBuilder(Graph& graph)
constructs a flow builder with a graph

Public functions

template<typename C, std::enable_if_t<is_static_task_v<C>, void>* = nullptr>
auto emplace(C&& callable) -> Task
creates a static task
template<typename C, std::enable_if_t<is_runtime_task_v<C>, void>* = nullptr>
auto emplace(C&& callable) -> Task
creates a runtime task
template<typename C, std::enable_if_t<is_subflow_task_v<C>, void>* = nullptr>
auto emplace(C&& callable) -> Task
creates a dynamic task
template<typename C, std::enable_if_t<is_condition_task_v<C>, void>* = nullptr>
auto emplace(C&& callable) -> Task
creates a condition task
template<typename C, std::enable_if_t<is_multi_condition_task_v<C>, void>* = nullptr>
auto emplace(C&& callable) -> Task
creates a multi-condition task
template<typename... C, std::enable_if_t<(sizeof...(C)>1), void>* = nullptr>
auto emplace(C && ... callables) -> auto
creates multiple tasks from a list of callable objects
void erase(Task task)
removes a task from a taskflow
template<typename T>
auto composed_of(T& object) -> Task
creates a module task for the target object
auto placeholder() -> Task
creates a placeholder task
void linearize(std::vector<Task>& tasks)
adds adjacent dependency links to a linear list of tasks
void linearize(std::initializer_list<Task> tasks)
adds adjacent dependency links to a linear list of tasks
template<typename B, typename E, typename C, typename P = DefaultPartitioner>
auto for_each(B first, E last, C callable, P part = P()) -> Task
constructs an STL-styled parallel-for task
template<typename B, typename E, typename S, typename C, typename P = DefaultPartitioner>
auto for_each_index(B first, E last, S step, C callable, P part = P()) -> Task
constructs an index-based parallel-for task
template<typename R, typename C, typename P = DefaultPartitioner>
auto for_each_index(R range, C callable, P part = P()) -> Task
constructs an index range-based parallel-for task
template<typename B, typename E, typename O, typename C, typename P = DefaultPartitioner, std::enable_if_t<is_partitioner_v<std::decay_t<P>>, void>* = nullptr>
auto transform(B first1, E last1, O d_first, C c, P part = P()) -> Task
constructs a parallel-transform task
template<typename B1, typename E1, typename B2, typename O, typename C, typename P = DefaultPartitioner, std::enable_if_t<!is_partitioner_v<std::decay_t<C>>, void>* = nullptr>
auto transform(B1 first1, E1 last1, B2 first2, O d_first, C c, P part = P()) -> Task
constructs a parallel-transform task
template<typename B, typename E, typename T, typename O, typename P = DefaultPartitioner>
auto reduce(B first, E last, T& init, O bop, P part = P()) -> Task
constructs an STL-styled parallel-reduction task
template<typename R, typename T, typename L, typename G, typename P = DefaultPartitioner>
auto reduce_by_index(R range, T& init, L lop, G gop, P part = P()) -> Task
constructs an index range-based parallel-reduction task
template<typename B, typename E, typename T, typename BOP, typename UOP, typename P = DefaultPartitioner, std::enable_if_t<is_partitioner_v<std::decay_t<P>>, void>* = nullptr>
auto transform_reduce(B first, E last, T& init, BOP bop, UOP uop, P part = P()) -> Task
constructs an STL-styled parallel transform-reduce task
template<typename B1, typename E1, typename B2, typename T, typename BOP_R, typename BOP_T, typename P = DefaultPartitioner, std::enable_if_t<!is_partitioner_v<std::decay_t<BOP_T>>, void>* = nullptr>
auto transform_reduce(B1 first1, E1 last1, B2 first2, T& init, BOP_R bop_r, BOP_T bop_t, P part = P()) -> Task
constructs an STL-styled parallel transform-reduce task
template<typename B, typename E, typename D, typename BOP>
auto inclusive_scan(B first, E last, D d_first, BOP bop) -> Task
creates an STL-styled parallel inclusive-scan task
template<typename B, typename E, typename D, typename BOP, typename T>
auto inclusive_scan(B first, E last, D d_first, BOP bop, T init) -> Task
creates an STL-styled parallel inclusive-scan task with an initial value
template<typename B, typename E, typename D, typename T, typename BOP>
auto exclusive_scan(B first, E last, D d_first, T init, BOP bop) -> Task
creates an STL-styled parallel exclusive-scan task
template<typename B, typename E, typename D, typename BOP, typename UOP>
auto transform_inclusive_scan(B first, E last, D d_first, BOP bop, UOP uop) -> Task
creates an STL-styled parallel transform-inclusive scan task
template<typename B, typename E, typename D, typename BOP, typename UOP, typename T>
auto transform_inclusive_scan(B first, E last, D d_first, BOP bop, UOP uop, T init) -> Task
creates an STL-styled parallel transform-inclusive scan task
template<typename B, typename E, typename D, typename T, typename BOP, typename UOP>
auto transform_exclusive_scan(B first, E last, D d_first, T init, BOP bop, UOP uop) -> Task
creates an STL-styled parallel transform-exclusive scan task
template<typename B, typename E, typename T, typename UOP, typename P = DefaultPartitioner>
auto find_if(B first, E last, T& result, UOP predicate, P part = P()) -> Task
constructs a task to perform STL-styled find-if algorithm
template<typename B, typename E, typename T, typename UOP, typename P = DefaultPartitioner>
auto find_if_not(B first, E last, T& result, UOP predicate, P part = P()) -> Task
constructs a task to perform STL-styled find-if-not algorithm
template<typename B, typename E, typename T, typename C, typename P>
auto min_element(B first, E last, T& result, C comp, P part) -> Task
constructs a task to perform STL-styled min-element algorithm
template<typename B, typename E, typename T, typename C, typename P>
auto max_element(B first, E last, T& result, C comp, P part) -> Task
constructs a task to perform STL-styled max-element algorithm
template<typename B, typename E, typename C>
auto sort(B first, E last, C cmp) -> Task
constructs a dynamic task to perform STL-styled parallel sort
template<typename B, typename E>
auto sort(B first, E last) -> Task
constructs a dynamic task to perform STL-styled parallel sort using the std::less<T> comparator, where T is the element type

Protected variables

Graph& _graph
associated graph object

Function documentation

template<typename C, std::enable_if_t<is_static_task_v<C>, void>* = nullptr>
Task tf::FlowBuilder::emplace(C&& callable)

creates a static task

Template parameters
C callable type constructible from std::function<void()>
Parameters
callable callable to construct a static task
Returns a tf::Task handle

The following example creates a static task.

tf::Task static_task = taskflow.emplace([](){});

Please refer to Static Tasking for details.

template<typename C, std::enable_if_t<is_runtime_task_v<C>, void>* = nullptr>
Task tf::FlowBuilder::emplace(C&& callable)

creates a runtime task

Template parameters
C callable type constructible from std::function<void(tf::Runtime&)>
Parameters
callable callable to construct a runtime task
Returns a tf::Task handle

The following example creates a runtime task.

tf::Task static_task = taskflow.emplace([](tf::Runtime&){});

Please refer to Interact with the Runtime for details.

template<typename C, std::enable_if_t<is_subflow_task_v<C>, void>* = nullptr>
Task tf::FlowBuilder::emplace(C&& callable)

creates a dynamic task

Template parameters
C callable type constructible from std::function<void(tf::Subflow&)>
Parameters
callable callable to construct a dynamic task
Returns a tf::Task handle

The following example creates a dynamic task (tf::Subflow) that spawns two static tasks.

tf::Task dynamic_task = taskflow.emplace([](tf::Subflow& sf){
  tf::Task static_task1 = sf.emplace([](){});
  tf::Task static_task2 = sf.emplace([](){});
});

Please refer to Subflow Tasking for details.

template<typename C, std::enable_if_t<is_condition_task_v<C>, void>* = nullptr>
Task tf::FlowBuilder::emplace(C&& callable)

creates a condition task

Template parameters
C callable type constructible from std::function<int()>
Parameters
callable callable to construct a condition task
Returns a tf::Task handle

The following example creates an if-else block using one condition task and three static tasks.

tf::Taskflow taskflow;

auto [init, cond, yes, no] = taskflow.emplace(
  [] () { },
  [] () { return 0; },
  [] () { std::cout << "yes\n"; },
  [] () { std::cout << "no\n"; }
);

// executes yes if cond returns 0, or no if cond returns 1
cond.precede(yes, no);
cond.succeed(init);

Please refer to Conditional Tasking for details.

template<typename C, std::enable_if_t<is_multi_condition_task_v<C>, void>* = nullptr>
Task tf::FlowBuilder::emplace(C&& callable)

creates a multi-condition task

Template parameters
C callable type constructible from std::function<tf::SmallVector<int>()>
Parameters
callable callable to construct a multi-condition task
Returns a tf::Task handle

The following example creates a multi-condition task that selectively jumps to two successor tasks.

tf::Taskflow taskflow;

auto [init, cond, branch1, branch2, branch3] = taskflow.emplace(
  [] () { },
  [] () { return tf::SmallVector{0, 2}; },
  [] () { std::cout << "branch1\n"; },
  [] () { std::cout << "branch2\n"; },
  [] () { std::cout << "branch3\n"; }
);

// executes branch1 and branch3 when cond returns 0 and 2
cond.precede(branch1, branch2, branch3);
cond.succeed(init);

Please refer to Conditional Tasking for details.

template<typename... C, std::enable_if_t<(sizeof...(C)>1), void>* = nullptr>
auto tf::FlowBuilder::emplace(C && ... callables)

creates multiple tasks from a list of callable objects

Template parameters
C callable types
Parameters
callables one or multiple callable objects constructible from each task category
Returns a tf::Task handle

The method returns a tuple of tasks each corresponding to the given callable target. You can use structured binding to get the return tasks one by one. The following example creates four static tasks and assign them to A, B, C, and D using structured binding.

auto [A, B, C, D] = taskflow.emplace(
  [] () { std::cout << "A"; },
  [] () { std::cout << "B"; },
  [] () { std::cout << "C"; },
  [] () { std::cout << "D"; }
);

void tf::FlowBuilder::erase(Task task)

removes a task from a taskflow

Parameters
task task to remove

Removes a task and its input and output dependencies from the graph associated with the flow builder. If the task does not belong to the graph, nothing will happen.

tf::Task A = taskflow.emplace([](){ std::cout << "A"; });
tf::Task B = taskflow.emplace([](){ std::cout << "B"; });
tf::Task C = taskflow.emplace([](){ std::cout << "C"; });
tf::Task D = taskflow.emplace([](){ std::cout << "D"; });
A.precede(B, C, D);

// erase A from the taskflow and its dependencies to B, C, and D
taskflow.erase(A);

template<typename T>
Task tf::FlowBuilder::composed_of(T& object)

creates a module task for the target object

Template parameters
T target object type
Parameters
object a custom object that defines the method T::graph()
Returns a tf::Task handle

The example below demonstrates a taskflow composition using the composed_of method.

tf::Taskflow t1, t2;
t1.emplace([](){ std::cout << "t1"; });

// t2 is partially composed of t1
tf::Task comp = t2.composed_of(t1);
tf::Task init = t2.emplace([](){ std::cout << "t2"; });
init.precede(comp);

The taskflow object t2 is composed of another taskflow object t1, preceded by another static task init. When taskflow t2 is submitted to an executor, init will run first and then comp which spawns its definition in taskflow t1.

The target object being composed must define the method T::graph() that returns a reference to a graph object of type tf::Graph such that it can interact with the executor. For example:

// custom struct
struct MyObj {
  tf::Graph graph;
  MyObj() {
    tf::FlowBuilder builder(graph);
    tf::Task task = builder.emplace([](){
      std::cout << "a task\n";  // static task
    });
  }
  Graph& graph() { return graph; }
};

MyObj obj;
tf::Task comp = taskflow.composed_of(obj);

Please refer to Composable Tasking for details.

Task tf::FlowBuilder::placeholder()

creates a placeholder task

Returns a tf::Task handle

A placeholder task maps to a node in the taskflow graph, but it does not have any callable work assigned yet. A placeholder task is different from an empty task handle that does not point to any node in a graph.

// create a placeholder task with no callable target assigned
tf::Task placeholder = taskflow.placeholder();
assert(placeholder.empty() == false && placeholder.has_work() == false);

// create an empty task handle
tf::Task task;
assert(task.empty() == true);

// assign the task handle to the placeholder task
task = placeholder;
assert(task.empty() == false && task.has_work() == false);

void tf::FlowBuilder::linearize(std::vector<Task>& tasks)

adds adjacent dependency links to a linear list of tasks

Parameters
tasks a vector of tasks

This member function creates linear dependencies over a vector of tasks.

tf::Task A = taskflow.emplace([](){ std::cout << "A"; });
tf::Task B = taskflow.emplace([](){ std::cout << "B"; });
tf::Task C = taskflow.emplace([](){ std::cout << "C"; });
tf::Task D = taskflow.emplace([](){ std::cout << "D"; });
std::vector<tf::Task> tasks {A, B, C, D}
taskflow.linearize(tasks);  // A->B->C->D

void tf::FlowBuilder::linearize(std::initializer_list<Task> tasks)

adds adjacent dependency links to a linear list of tasks

Parameters
tasks an initializer list of tasks

This member function creates linear dependencies over a list of tasks.

tf::Task A = taskflow.emplace([](){ std::cout << "A"; });
tf::Task B = taskflow.emplace([](){ std::cout << "B"; });
tf::Task C = taskflow.emplace([](){ std::cout << "C"; });
tf::Task D = taskflow.emplace([](){ std::cout << "D"; });
taskflow.linearize({A, B, C, D});  // A->B->C->D

template<typename B, typename E, typename C, typename P = DefaultPartitioner>
Task tf::FlowBuilder::for_each(B first, E last, C callable, P part = P())

constructs an STL-styled parallel-for task

Template parameters
B beginning iterator type
E ending iterator type
C callable type
P partitioner type (default tf::DefaultPartitioner)
Parameters
first iterator to the beginning (inclusive)
last iterator to the end (exclusive)
callable callable object to apply to the dereferenced iterator
part partitioning algorithm to schedule parallel iterations
Returns a tf::Task handle

The task spawns asynchronous tasks that applies the callable object to each object obtained by dereferencing every iterator in the range [first, last). This method is equivalent to the parallel execution of the following loop:

for(auto itr=first; itr!=last; itr++) {
  callable(*itr);
}

Iterators can be made stateful by using std::reference_wrapper The callable needs to take a single argument of the dereferenced iterator type.

Please refer to Parallel Iterations for details.

template<typename B, typename E, typename S, typename C, typename P = DefaultPartitioner>
Task tf::FlowBuilder::for_each_index(B first, E last, S step, C callable, P part = P())

constructs an index-based parallel-for task

Template parameters
B beginning index type (must be integral)
E ending index type (must be integral)
S step type (must be integral)
C callable type
P partitioner type (default tf::DefaultPartitioner)
Parameters
first index of the beginning (inclusive)
last index of the end (exclusive)
step step size
callable callable object to apply to each valid index
part partitioning algorithm to schedule parallel iterations
Returns a tf::Task handle

The task spawns asynchronous tasks that applies the callable object to each index in the range [first, last) with the step size. This method is equivalent to the parallel execution of the following loop:

// case 1: step size is positive
for(auto i=first; i<last; i+=step) {
  callable(i);
}

// case 2: step size is negative
for(auto i=first, i>last; i+=step) {
  callable(i);
}

Iterators can be made stateful by using std::reference_wrapper The callable needs to take a single argument of the integral index type.

Please refer to Parallel Iterations for details.

template<typename R, typename C, typename P = DefaultPartitioner>
Task tf::FlowBuilder::for_each_index(R range, C callable, P part = P())

constructs an index range-based parallel-for task

Template parameters
R index range type (tf::IndexRange)
C callable type
P partitioner type (default tf::DefaultPartitioner)
Parameters
range index range
callable callable object to apply to each valid index
part partitioning algorithm to schedule parallel iterations
Returns a tf::Task handle

The task spawns asynchronous tasks that applies the callable object to in the range [first, last) with the step size.

// [0, 17) with a step size of 2 using tf::IndexRange
tf::IndexRange<int> range(0, 17, 2);

// parallelize the sequence [0, 2, 4, 6, 8, 10, 12, 14, 16]
taskflow.for_each_index(range, [](tf::IndexRange<int> range) {
  // iterate each index in the subrange
  for(int i=range.begin(); i<range.end(); i+=range.step_size()) {
    printf("iterate %d\n", i);
  }
});

executor.run(taskflow).wait();

The callable needs to take a single argument of type tf::IndexRange.

Please refer to Parallel Iterations for details.

template<typename B, typename E, typename O, typename C, typename P = DefaultPartitioner, std::enable_if_t<is_partitioner_v<std::decay_t<P>>, void>* = nullptr>
Task tf::FlowBuilder::transform(B first1, E last1, O d_first, C c, P part = P())

constructs a parallel-transform task

Template parameters
B beginning input iterator type
E ending input iterator type
O output iterator type
C callable type
P partitioner type (default tf::DefaultPartitioner)
Parameters
first1 iterator to the beginning of the first range
last1 iterator to the end of the first range
d_first iterator to the beginning of the output range
c an unary callable to apply to dereferenced input elements
part partitioning algorithm to schedule parallel iterations
Returns a tf::Task handle

The task spawns asynchronous tasks that applies the callable object to an input range and stores the result in another output range. This method is equivalent to the parallel execution of the following loop:

while (first1 != last1) {
  *d_first++ = c(*first1++);
}

Iterators can be made stateful by using std::reference_wrapper The callable needs to take a single argument of the dereferenced iterator type.

Please refer to Parallel Transforms for details.

template<typename B1, typename E1, typename B2, typename O, typename C, typename P = DefaultPartitioner, std::enable_if_t<!is_partitioner_v<std::decay_t<C>>, void>* = nullptr>
Task tf::FlowBuilder::transform(B1 first1, E1 last1, B2 first2, O d_first, C c, P part = P())

constructs a parallel-transform task

Template parameters
B1 beginning input iterator type for the first input range
E1 ending input iterator type for the first input range
B2 beginning input iterator type for the first second range
O output iterator type
C callable type
P partitioner type (default tf::DefaultPartitioner)
Parameters
first1 iterator to the beginning of the first input range
last1 iterator to the end of the first input range
first2 iterator to the beginning of the second input range
d_first iterator to the beginning of the output range
c a binary operator to apply to dereferenced input elements
part partitioning algorithm to schedule parallel iterations
Returns a tf::Task handle

The task spawns asynchronous tasks that applies the callable object to two input ranges and stores the result in another output range. This method is equivalent to the parallel execution of the following loop:

while (first1 != last1) {
  *d_first++ = c(*first1++, *first2++);
}

Iterators can be made stateful by using std::reference_wrapper The callable needs to take two arguments of dereferenced elements from the two input ranges.

Please refer to Parallel Transforms for details.

template<typename B, typename E, typename T, typename O, typename P = DefaultPartitioner>
Task tf::FlowBuilder::reduce(B first, E last, T& init, O bop, P part = P())

constructs an STL-styled parallel-reduction task

Template parameters
B beginning iterator type
E ending iterator type
T result type
O binary reducer type
P partitioner type (default tf::DefaultPartitioner)
Parameters
first iterator to the beginning (inclusive)
last iterator to the end (exclusive)
init initial value of the reduction and the storage for the reduced result
bop binary operator that will be applied
part partitioning algorithm to schedule parallel iterations
Returns a tf::Task handle

The task spawns asynchronous tasks to perform parallel reduction over init and the elements in the range [first, last). The reduced result is store in init. This method is equivalent to the parallel execution of the following loop:

for(auto itr=first; itr!=last; itr++) {
  init = bop(init, *itr);
}

Iterators can be made stateful by using std::reference_wrapper

Please refer to Parallel Reduction for details.

template<typename R, typename T, typename L, typename G, typename P = DefaultPartitioner>
Task tf::FlowBuilder::reduce_by_index(R range, T& init, L lop, G gop, P part = P())

constructs an index range-based parallel-reduction task

Template parameters
R index range type (tf::IndexRange)
T result type
L local reducer type
G global reducer type
P partitioner type (default tf::DefaultPartitioner)
Parameters
range index range
init initial value of the reduction and the storage for the reduced result
lop binary operator that will be applied locally per worker
gop binary operator that will be applied globally among worker
part partitioning algorithm to schedule parallel iterations
Returns a tf::Task handle

The task spawns asynchronous tasks to perform parallel reduction over a range with init. The reduced result is store in init. Unlike the iterator-based reduction, index range-based reduction is particularly useful for applications that benefit from SIMD optimizations or other range-based processing strategies.

const size_t N = 1000000;
std::vector<int> data(N);  // uninitialized data vector
int res = 1;               // res will participate in the reduction

taskflow.reduce_by_index(
  tf::IndexRange<size_t>(0, N, 1),
  // final result
  res,
  // local reducer
  [&](tf::IndexRange<size_t> subrange, std::optional<int> running_total) -> int {
    int residual = running_total ? *running_total : 0.0;
    for(size_t i=subrange.begin(); i<subrange.end(); i+=subrange.step_size()) {
      data[i] = 1.0;
      residual += data[i];
    }
    printf("partial sum = %lf\n", residual);
    return residual;
  },
  // global reducer
  std::plus<int>()
);
executor.run(taskflow).wait();
assert(res = N + 1);

Range can be made stateful by using std::reference_wrapper.

Please refer to Parallel Reduction for details.

template<typename B, typename E, typename T, typename BOP, typename UOP, typename P = DefaultPartitioner, std::enable_if_t<is_partitioner_v<std::decay_t<P>>, void>* = nullptr>
Task tf::FlowBuilder::transform_reduce(B first, E last, T& init, BOP bop, UOP uop, P part = P())

constructs an STL-styled parallel transform-reduce task

Template parameters
B beginning iterator type
E ending iterator type
T result type
BOP binary reducer type
UOP unary transformation type
P partitioner type (default tf::DefaultPartitioner)
Parameters
first iterator to the beginning (inclusive)
last iterator to the end (exclusive)
init initial value of the reduction and the storage for the reduced result
bop binary operator that will be applied in unspecified order to the results of uop
uop unary operator that will be applied to transform each element in the range to the result type
part partitioning algorithm to schedule parallel iterations
Returns a tf::Task handle

The task spawns asynchronous tasks to perform parallel reduction over init and the transformed elements in the range [first, last). The reduced result is store in init. This method is equivalent to the parallel execution of the following loop:

for(auto itr=first; itr!=last; itr++) {
  init = bop(init, uop(*itr));
}

Iterators can be made stateful by using std::reference_wrapper

Please refer to Parallel Reduction for details.

template<typename B1, typename E1, typename B2, typename T, typename BOP_R, typename BOP_T, typename P = DefaultPartitioner, std::enable_if_t<!is_partitioner_v<std::decay_t<BOP_T>>, void>* = nullptr>
Task tf::FlowBuilder::transform_reduce(B1 first1, E1 last1, B2 first2, T& init, BOP_R bop_r, BOP_T bop_t, P part = P())

constructs an STL-styled parallel transform-reduce task

Template parameters
B1 first beginning iterator type
E1 first ending iterator type
B2 second beginning iterator type
T result type
BOP_R binary reducer type
BOP_T binary transformation type
P partitioner type (default tf::DefaultPartitioner)
Parameters
first1 iterator to the beginning of the first range (inclusive)
last1 iterator to the end of the first range (exclusive)
first2 iterator to the beginning of the second range
init initial value of the reduction and the storage for the reduced result
bop_r binary operator that will be applied in unspecified order to the results of bop_t
bop_t binary operator that will be applied to transform each element in the range to the result type
part partitioning algorithm to schedule parallel iterations
Returns a tf::Task handle

The task spawns asynchronous tasks to perform parallel reduction over init and transformed elements in the range [first, last). The reduced result is store in init. This method is equivalent to the parallel execution of the following loop:

for(auto itr1=first1, itr2=first2; itr1!=last1; itr1++, itr2++) {
  init = bop_r(init, bop_t(*itr1, *itr2));
}

Iterators can be made stateful by using std::reference_wrapper

Please refer to Parallel Reduction for details.

template<typename B, typename E, typename D, typename BOP>
Task tf::FlowBuilder::inclusive_scan(B first, E last, D d_first, BOP bop)

creates an STL-styled parallel inclusive-scan task

Template parameters
B beginning iterator type
E ending iterator type
D destination iterator type
BOP summation operator type
Parameters
first start of input range
last end of input range
d_first start of output range (may be the same as input range)
bop function to perform summation

Performs the cumulative sum (aka prefix sum, aka scan) of the input range and writes the result to the output range. Each element of the output range contains the running total of all earlier elements using the given binary operator for summation.

This function generates an inclusive scan, meaning that the N-th element of the output range is the sum of the first N input elements, so the N-th input element is included.

std::vector<int> input = {1, 2, 3, 4, 5};
taskflow.inclusive_scan(
  input.begin(), input.end(), input.begin(), std::plus<int>{}
);
executor.run(taskflow).wait();

// input is {1, 3, 6, 10, 15}

Iterators can be made stateful by using std::reference_wrapper

Please refer to Parallel Scan for details.

template<typename B, typename E, typename D, typename BOP, typename T>
Task tf::FlowBuilder::inclusive_scan(B first, E last, D d_first, BOP bop, T init)

creates an STL-styled parallel inclusive-scan task with an initial value

Template parameters
B beginning iterator type
E ending iterator type
D destination iterator type
BOP summation operator type
T initial value type
Parameters
first start of input range
last end of input range
d_first start of output range (may be the same as input range)
bop function to perform summation
init initial value

Performs the cumulative sum (aka prefix sum, aka scan) of the input range and writes the result to the output range. Each element of the output range contains the running total of all earlier elements (and the initial value) using the given binary operator for summation.

This function generates an inclusive scan, meaning the N-th element of the output range is the sum of the first N input elements, so the N-th input element is included.

std::vector<int> input = {1, 2, 3, 4, 5};
taskflow.inclusive_scan(
  input.begin(), input.end(), input.begin(), std::plus<int>{}, -1
);
executor.run(taskflow).wait();

// input is {0, 2, 5, 9, 14}

Iterators can be made stateful by using std::reference_wrapper

Please refer to Parallel Scan for details.

template<typename B, typename E, typename D, typename T, typename BOP>
Task tf::FlowBuilder::exclusive_scan(B first, E last, D d_first, T init, BOP bop)

creates an STL-styled parallel exclusive-scan task

Template parameters
B beginning iterator type
E ending iterator type
D destination iterator type
T initial value type
BOP summation operator type
Parameters
first start of input range
last end of input range
d_first start of output range (may be the same as input range)
init initial value
bop function to perform summation

Performs the cumulative sum (aka prefix sum, aka scan) of the input range and writes the result to the output range. Each element of the output range contains the running total of all earlier elements (and the initial value) using the given binary operator for summation.

This function generates an exclusive scan, meaning the N-th element of the output range is the sum of the first N-1 input elements, so the N-th input element is not included.

std::vector<int> input = {1, 2, 3, 4, 5};
taskflow.exclusive_scan(
  input.begin(), input.end(), input.begin(), -1, std::plus<int>{}
);
executor.run(taskflow).wait();

// input is {-1, 0, 2, 5, 9}

Iterators can be made stateful by using std::reference_wrapper

Please refer to Parallel Scan for details.

template<typename B, typename E, typename D, typename BOP, typename UOP>
Task tf::FlowBuilder::transform_inclusive_scan(B first, E last, D d_first, BOP bop, UOP uop)

creates an STL-styled parallel transform-inclusive scan task

Template parameters
B beginning iterator type
E ending iterator type
D destination iterator type
BOP summation operator type
UOP transform operator type
Parameters
first start of input range
last end of input range
d_first start of output range (may be the same as input range)
bop function to perform summation
uop function to transform elements of the input range

Write the cumulative sum (aka prefix sum, aka scan) of the input range to the output range. Each element of the output range contains the running total of all earlier elements using uop to transform the input elements and using bop for summation.

This function generates an inclusive scan, meaning the Nth element of the output range is the sum of the first N input elements, so the Nth input element is included.

std::vector<int> input = {1, 2, 3, 4, 5};
taskflow.transform_inclusive_scan(
  input.begin(), input.end(), input.begin(), std::plus<int>{}, 
  [] (int item) { return -item; }
);
executor.run(taskflow).wait();

// input is {-1, -3, -6, -10, -15}

Iterators can be made stateful by using std::reference_wrapper

Please refer to Parallel Scan for details.

template<typename B, typename E, typename D, typename BOP, typename UOP, typename T>
Task tf::FlowBuilder::transform_inclusive_scan(B first, E last, D d_first, BOP bop, UOP uop, T init)

creates an STL-styled parallel transform-inclusive scan task

Template parameters
B beginning iterator type
E ending iterator type
D destination iterator type
BOP summation operator type
UOP transform operator type
T initial value type
Parameters
first start of input range
last end of input range
d_first start of output range (may be the same as input range)
bop function to perform summation
uop function to transform elements of the input range
init initial value

Write the cumulative sum (aka prefix sum, aka scan) of the input range to the output range. Each element of the output range contains the running total of all earlier elements (including an initial value) using uop to transform the input elements and using bop for summation.

This function generates an inclusive scan, meaning the Nth element of the output range is the sum of the first N input elements, so the Nth input element is included.

std::vector<int> input = {1, 2, 3, 4, 5};
taskflow.transform_inclusive_scan(
  input.begin(), input.end(), input.begin(), std::plus<int>{}, 
  [] (int item) { return -item; },
  -1
);
executor.run(taskflow).wait();

// input is {-2, -4, -7, -11, -16}

Iterators can be made stateful by using std::reference_wrapper

Please refer to Parallel Scan for details.

template<typename B, typename E, typename D, typename T, typename BOP, typename UOP>
Task tf::FlowBuilder::transform_exclusive_scan(B first, E last, D d_first, T init, BOP bop, UOP uop)

creates an STL-styled parallel transform-exclusive scan task

Template parameters
B beginning iterator type
E ending iterator type
D destination iterator type
T initial value type
BOP summation operator type
UOP transform operator type
Parameters
first start of input range
last end of input range
d_first start of output range (may be the same as input range)
init initial value
bop function to perform summation
uop function to transform elements of the input range

Write the cumulative sum (aka prefix sum, aka scan) of the input range to the output range. Each element of the output range contains the running total of all earlier elements (including an initial value) using uop to transform the input elements and using bop for summation.

This function generates an exclusive scan, meaning the Nth element of the output range is the sum of the first N-1 input elements, so the Nth input element is not included.

std::vector<int> input = {1, 2, 3, 4, 5};
taskflow.transform_exclusive_scan(
  input.begin(), input.end(), input.begin(), -1, std::plus<int>{},
  [](int item) { return -item; }
);
executor.run(taskflow).wait();

// input is {-1, -2, -4, -7, -11}

Iterators can be made stateful by using std::reference_wrapper

Please refer to Parallel Scan for details.

template<typename B, typename E, typename T, typename UOP, typename P = DefaultPartitioner>
Task tf::FlowBuilder::find_if(B first, E last, T& result, UOP predicate, P part = P())

constructs a task to perform STL-styled find-if algorithm

Template parameters
B beginning iterator type
E ending iterator type
T resulting iterator type
UOP unary predicate type
P partitioner type
Parameters
first start of the input range
last end of the input range
result resulting iterator to the found element in the input range
predicate unary predicate which returns true for the required element
part partitioning algorithm (default tf::DefaultPartitioner)

Returns an iterator to the first element in the range [first, last) that satisfies the given criteria (or last if there is no such iterator). This method is equivalent to the parallel execution of the following loop:

auto find_if(InputIt first, InputIt last, UnaryPredicate p) {
  for (; first != last; ++first) {
    if (predicate(*first)){
      return first;
    }
  }
  return last;
}

For example, the code below find the element that satisfies the given criteria (value plus one is equal to 23) from an input range of 10 elements:

std::vector<int> input = {1, 6, 9, 10, 22, 5, 7, 8, 9, 11};
std::vector<int>::iterator result;
taskflow.find_if(
  input.begin(), input.end(), [](int i){ return i+1 = 23; }, result
);
executor.run(taskflow).wait();
assert(*result == 22);

Iterators can be made stateful by using std::reference_wrapper

template<typename B, typename E, typename T, typename UOP, typename P = DefaultPartitioner>
Task tf::FlowBuilder::find_if_not(B first, E last, T& result, UOP predicate, P part = P())

constructs a task to perform STL-styled find-if-not algorithm

Template parameters
B beginning iterator type
E ending iterator type
T resulting iterator type
UOP unary predicate type
P partitioner type
Parameters
first start of the input range
last end of the input range
result resulting iterator to the found element in the input range
predicate unary predicate which returns false for the required element
part partitioning algorithm (default tf::DefaultPartitioner)

Returns an iterator to the first element in the range [first, last) that satisfies the given criteria (or last if there is no such iterator). This method is equivalent to the parallel execution of the following loop:

auto find_if(InputIt first, InputIt last, UnaryPredicate p) {
  for (; first != last; ++first) {
    if (!predicate(*first)){
      return first;
    }
  }
  return last;
}

For example, the code below find the element that satisfies the given criteria (value is not equal to 1) from an input range of 10 elements:

std::vector<int> input = {1, 1, 1, 1, 22, 1, 1, 1, 1, 1};
std::vector<int>::iterator result;
taskflow.find_if_not(
  input.begin(), input.end(), [](int i){ return i == 1; }, result
);
executor.run(taskflow).wait();
assert(*result == 22);

Iterators can be made stateful by using std::reference_wrapper

template<typename B, typename E, typename T, typename C, typename P>
Task tf::FlowBuilder::min_element(B first, E last, T& result, C comp, P part)

constructs a task to perform STL-styled min-element algorithm

Template parameters
B beginning iterator type
E ending iterator type
T resulting iterator type
C comparator type
P partitioner type
Parameters
first start of the input range
last end of the input range
result resulting iterator to the found element in the input range
comp comparison function object
part partitioning algorithm (default tf::DefaultPartitioner)

Finds the smallest element in the [first, last) using the given comparison function object. The iterator to that smallest element is stored in result. This method is equivalent to the parallel execution of the following loop:

if (first == last) {
  return last;
}
auto smallest = first;
++first;
for (; first != last; ++first) {
  if (comp(*first, *smallest)) {
    smallest = first;
  }
}
return smallest;

For example, the code below find the smallest element from an input range of 10 elements.

std::vector<int> input = {1, 1, 1, 1, 1, -1, 1, 1, 1, 1};
std::vector<int>::iterator result;
taskflow.min_element(
  input.begin(), input.end(), std::less<int>(), result
);
executor.run(taskflow).wait();
assert(*result == -1);

Iterators can be made stateful by using std::reference_wrapper

template<typename B, typename E, typename T, typename C, typename P>
Task tf::FlowBuilder::max_element(B first, E last, T& result, C comp, P part)

constructs a task to perform STL-styled max-element algorithm

Template parameters
B beginning iterator type
E ending iterator type
T resulting iterator type
C comparator type
P partitioner type
Parameters
first start of the input range
last end of the input range
result resulting iterator to the found element in the input range
comp comparison function object
part partitioning algorithm (default tf::DefaultPartitioner)

Finds the largest element in the [first, last) using the given comparison function object. The iterator to that largest element is stored in result. This method is equivalent to the parallel execution of the following loop:

if (first == last){
  return last;
}
auto largest = first;
++first;
for (; first != last; ++first) {
  if (comp(*largest, *first)) {
    largest = first;
  }
}
return largest;

For example, the code below find the largest element from an input range of 10 elements.

std::vector<int> input = {1, 1, 1, 1, 1, 2, 1, 1, 1, 1};
std::vector<int>::iterator result;
taskflow.max_element(
  input.begin(), input.end(), std::less<int>(), result
);
executor.run(taskflow).wait();
assert(*result == 2);

Iterators can be made stateful by using std::reference_wrapper

template<typename B, typename E, typename C>
Task tf::FlowBuilder::sort(B first, E last, C cmp)

constructs a dynamic task to perform STL-styled parallel sort

Template parameters
B beginning iterator type (random-accessible)
E ending iterator type (random-accessible)
C comparator type
Parameters
first iterator to the beginning (inclusive)
last iterator to the end (exclusive)
cmp comparison operator

The task spawns asynchronous tasks to sort elements in the range [first, last) in parallel.

Iterators can be made stateful by using std::reference_wrapper

Please refer to Parallel Sort for details.

template<typename B, typename E>
Task tf::FlowBuilder::sort(B first, E last)

constructs a dynamic task to perform STL-styled parallel sort using the std::less<T> comparator, where T is the element type

Template parameters
B beginning iterator type (random-accessible)
E ending iterator type (random-accessible)
Parameters
first iterator to the beginning (inclusive)
last iterator to the end (exclusive)

The task spawns asynchronous tasks to parallel sort elements in the range [first, last) using the std::less<T> comparator, where T is the dereferenced iterator type.

Iterators can be made stateful by using std::reference_wrapper

Please refer to Parallel Sort for details.