Loading...
Searching...
No Matches
for_each.hpp
1#pragma once
2
3#include "../cudaflow.hpp"
4
9
10namespace tf {
11
12namespace detail {
13
17template <typename I, typename C, typename E>
18__global__ void cuda_for_each_kernel(I first, unsigned count, C c) {
19 auto tid = threadIdx.x;
20 auto bid = blockIdx.x;
21 auto tile = cuda_get_tile(bid, E::nv, count);
22 cuda_strided_iterate<E::nt, E::vt>(
23 [=](auto, auto j) {
24 c(*(first + tile.begin + j));
25 },
26 tid, tile.count()
27 );
28}
29
31template <typename I, typename C, typename E>
32__global__ void cuda_for_each_index_kernel(I first, I inc, unsigned count, C c) {
33 auto tid = threadIdx.x;
34 auto bid = blockIdx.x;
35 auto tile = cuda_get_tile(bid, E::nv, count);
36 cuda_strided_iterate<E::nt, E::vt>(
37 [=]__device__(auto, auto j) {
38 c(first + inc*(tile.begin+j));
39 },
40 tid, tile.count()
41 );
42}
43
44} // end of namespace detail -------------------------------------------------
45
46// ----------------------------------------------------------------------------
47// cudaFlow: for_each, for_each_index
48// ----------------------------------------------------------------------------
49
50// Function: for_each
51template <typename Creator, typename Deleter>
52template <typename I, typename C, typename E>
54
55 unsigned count = std::distance(first, last);
56
57 return kernel(
58 E::num_blocks(count), E::nt, 0,
59 detail::cuda_for_each_kernel<I, C, E>, first, count, c
60 );
61}
62
63// Function: for_each
64template <typename Creator, typename Deleter>
65template <typename I, typename C, typename E>
67
68 unsigned count = std::distance(first, last);
69
70 kernel(task,
71 E::num_blocks(count), E::nt, 0,
72 detail::cuda_for_each_kernel<I, C, E>, first, count, c
73 );
74}
75
76// Function: for_each_index
77template <typename Creator, typename Deleter>
78template <typename I, typename C, typename E>
80
81 unsigned count = distance(first, last, inc);
82
83 return kernel(
84 E::num_blocks(count), E::nt, 0,
85 detail::cuda_for_each_index_kernel<I, C, E>, first, inc, count, c
86 );
87}
88
89// Function: for_each_index
90template <typename Creator, typename Deleter>
91template <typename I, typename C, typename E>
92void cudaGraphExecBase<Creator, Deleter>::for_each_index(cudaTask task, I first, I last, I inc, C c) {
93
94 unsigned count = distance(first, last, inc);
95
96 return kernel(task,
97 E::num_blocks(count), E::nt, 0,
98 detail::cuda_for_each_index_kernel<I, C, E>, first, inc, count, c
99 );
100}
101
102
103} // end of namespace tf -----------------------------------------------------
104
105
106
107
108
109
cudaTask for_each(I first, I last, C callable)
applies a callable to each dereferenced element of the data array
Definition for_each.hpp:53
cudaTask kernel(dim3 g, dim3 b, size_t s, F f, ArgsT... args)
creates a kernel task
Definition cuda_graph.hpp:1010
cudaTask for_each_index(I first, I last, I step, C callable)
applies a callable to each index in the range with the step size
Definition for_each.hpp:79
void kernel(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)
updates parameters of a kernel task
Definition cuda_graph_exec.hpp:279
void for_each_index(cudaTask task, I first, I last, I step, C callable)
updates parameters of a for_each_index kernel task created from the CUDA graph of *this
Definition for_each.hpp:92
void for_each(cudaTask task, I first, I last, C callable)
updates parameters of a for_each kernel task created from the CUDA graph of *this
Definition for_each.hpp:66
class to create a task handle of a CUDA Graph node
Definition cuda_graph.hpp:315
taskflow namespace
Definition small_vector.hpp:20
constexpr size_t distance(B beg, E end, S step)
calculates the number of iterations in the given index range
Definition iterator.hpp:73