3#include "../cudaflow.hpp"
17template <
typename I,
typename C,
typename E>
18__global__
void cuda_for_each_kernel(I first,
unsigned count, C c) {
19 auto tid = threadIdx.x;
20 auto bid = blockIdx.x;
21 auto tile = cuda_get_tile(bid, E::nv, count);
22 cuda_strided_iterate<E::nt, E::vt>(
24 c(*(first + tile.begin + j));
31template <
typename I,
typename C,
typename E>
32__global__
void cuda_for_each_index_kernel(I first, I inc,
unsigned count, C c) {
33 auto tid = threadIdx.x;
34 auto bid = blockIdx.x;
35 auto tile = cuda_get_tile(bid, E::nv, count);
36 cuda_strided_iterate<E::nt, E::vt>(
37 [=]__device__(
auto,
auto j) {
38 c(first + inc*(tile.begin+j));
51template <
typename Creator,
typename Deleter>
52template <
typename I,
typename C,
typename E>
55 unsigned count = std::distance(first, last);
58 E::num_blocks(count), E::nt, 0,
59 detail::cuda_for_each_kernel<I, C, E>, first, count, c
64template <
typename Creator,
typename Deleter>
65template <
typename I,
typename C,
typename E>
68 unsigned count = std::distance(first, last);
71 E::num_blocks(count), E::nt, 0,
72 detail::cuda_for_each_kernel<I, C, E>, first, count, c
77template <
typename Creator,
typename Deleter>
78template <
typename I,
typename C,
typename E>
81 unsigned count =
distance(first, last, inc);
84 E::num_blocks(count), E::nt, 0,
85 detail::cuda_for_each_index_kernel<I, C, E>, first, inc, count, c
90template <
typename Creator,
typename Deleter>
91template <
typename I,
typename C,
typename E>
94 unsigned count =
distance(first, last, inc);
97 E::num_blocks(count), E::nt, 0,
98 detail::cuda_for_each_index_kernel<I, C, E>, first, inc, count, c
cudaTask for_each(I first, I last, C callable)
applies a callable to each dereferenced element of the data array
Definition for_each.hpp:53
cudaTask kernel(dim3 g, dim3 b, size_t s, F f, ArgsT... args)
creates a kernel task
Definition cuda_graph.hpp:1010
cudaTask for_each_index(I first, I last, I step, C callable)
applies a callable to each index in the range with the step size
Definition for_each.hpp:79
void kernel(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)
updates parameters of a kernel task
Definition cuda_graph_exec.hpp:279
void for_each_index(cudaTask task, I first, I last, I step, C callable)
updates parameters of a for_each_index kernel task created from the CUDA graph of *this
Definition for_each.hpp:92
void for_each(cudaTask task, I first, I last, C callable)
updates parameters of a for_each kernel task created from the CUDA graph of *this
Definition for_each.hpp:66
class to create a task handle of a CUDA Graph node
Definition cuda_graph.hpp:315
taskflow namespace
Definition small_vector.hpp:20
constexpr size_t distance(B beg, E end, S step)
calculates the number of iterations in the given index range
Definition iterator.hpp:73