3#include "cuda_graph.hpp"
43 cudaGraphInstantiate(&exec, graph,
nullptr,
nullptr, 0),
44 "failed to create an executable graph"
52 template <
typename C,
typename D>
77 cudaGraphExecDestroy(executable);
92template <
typename Creator,
typename Deleter>
93class cudaGraphExecBase :
public std::unique_ptr<std::remove_pointer_t<cudaGraphExec_t>, Deleter> {
95 static_assert(std::is_pointer_v<cudaGraphExec_t>,
"cudaGraphExec_t is not a pointer type");
102 using base_type = std::unique_ptr<std::remove_pointer_t<cudaGraphExec_t>, Deleter>;
111 template <
typename... ArgsT>
113 Creator{}(std::forward<ArgsT>(args)...), Deleter()
135 template <
typename C>
145 template <
typename F,
typename... ArgsT>
147 cudaTask task, dim3 g, dim3 b,
size_t shm, F f, ArgsT... args
182 template <
typename T, std::enable_if_t<
183 is_pod_v<T> && (
sizeof(T)==1 ||
sizeof(T)==2 ||
sizeof(T)==4),
void>* =
nullptr
197 template <
typename T, std::enable_if_t<
198 is_pod_v<T> && (
sizeof(T)==1 ||
sizeof(T)==2 ||
sizeof(T)==4),
void>* =
nullptr
211 template <
typename T,
212 std::enable_if_t<!std::is_same_v<T, void>,
void>* =
nullptr
226 template <
typename C>
232 template <
typename I,
typename C,
typename E = cudaDefaultExecutionPolicy>
238 template <
typename I,
typename C,
typename E = cudaDefaultExecutionPolicy>
244 template <
typename I,
typename O,
typename C,
typename E = cudaDefaultExecutionPolicy>
250 template <
typename I1,
typename I2,
typename O,
typename C,
typename E = cudaDefaultExecutionPolicy>
266template <
typename Creator,
typename Deleter>
269 cudaHostNodeParams p {func, user_data};
271 cudaGraphExecHostNodeSetParams(this->get(), task._native_node, &p),
272 "failed to update kernel parameters on ", task
277template <
typename Creator,
typename Deleter>
278template <
typename F,
typename... ArgsT>
280 cudaTask task, dim3 g, dim3 b,
size_t s, F f, ArgsT... args
282 cudaKernelNodeParams p;
284 void* arguments[
sizeof...(ArgsT)] = { (
void*)(&args)... };
288 p.sharedMemBytes = s;
289 p.kernelParams = arguments;
293 cudaGraphExecKernelNodeSetParams(this->get(), task._native_node, &p),
294 "failed to update kernel parameters on ", task
299template <
typename Creator,
typename Deleter>
300template <
typename T, std::enable_if_t<!std::is_same_v<T,
void>,
void>*>
304 cudaGraphExecMemcpyNodeSetParams(this->get(), task._native_node, &p),
305 "failed to update memcpy parameters on ", task
310template <
typename Creator,
typename Deleter>
312 cudaTask task,
void* tgt,
const void* src,
size_t bytes
317 cudaGraphExecMemcpyNodeSetParams(this->get(), task._native_node, &p),
318 "failed to update memcpy parameters on ", task
323template <
typename Creator,
typename Deleter>
327 cudaGraphExecMemsetNodeSetParams(this->get(), task._native_node, &p),
328 "failed to update memset parameters on ", task
333template <
typename Creator,
typename Deleter>
334template <
typename T, std::enable_if_t<
335 is_pod_v<T> && (
sizeof(T)==1 ||
sizeof(T)==2 ||
sizeof(T)==4),
void>*
340 cudaGraphExecMemsetNodeSetParams(this->get(), task._native_node, &p),
341 "failed to update memset parameters on ", task
346template <
typename Creator,
typename Deleter>
347template <
typename T, std::enable_if_t<
348 is_pod_v<T> && (
sizeof(T)==1 ||
sizeof(T)==2 ||
sizeof(T)==4),
void>*
353 cudaGraphExecMemsetNodeSetParams(this->get(), task._native_node, &p),
354 "failed to update memset parameters on ", task
365template <
typename SC,
typename SD>
368 cudaGraphLaunch(exec, this->get()),
"failed to launch a CUDA executable graph"
376template <
typename SC,
typename SD>
377template <
typename EC,
typename ED>
379 return run(exec.get());
class to create a CUDA graph with uunique ownership
Definition cuda_graph.hpp:531
class to create an executable CUDA graph with unique ownership
Definition cuda_graph_exec.hpp:93
void zero(cudaTask task, T *dst, size_t count)
updates parameters of a memset task to a zero task
Definition cuda_graph_exec.hpp:350
cudaGraphExecBase(ArgsT &&... args)
constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator
Definition cuda_graph_exec.hpp:112
void transform(cudaTask task, I1 first1, I1 last1, I2 first2, O output, C c)
updates parameters of a transform kernel task created from the CUDA graph of *this
Definition transform.hpp:108
cudaGraphExecBase & operator=(cudaGraphExecBase &&)=default
assign the rhs to *this using move semantics
cudaGraphExecBase(cudaGraphExecBase &&)=default
constructs a cudaGraphExec from the given rhs using move semantics
void kernel(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)
updates parameters of a kernel task
Definition cuda_graph_exec.hpp:279
void for_each_index(cudaTask task, I first, I last, I step, C callable)
updates parameters of a for_each_index kernel task created from the CUDA graph of *this
Definition for_each.hpp:92
std::unique_ptr< std::remove_pointer_t< cudaGraphExec_t >, Deleter > base_type
base std::unique_ptr type
Definition cuda_graph_exec.hpp:102
void host(cudaTask task, C &&callable, void *user_data)
updates parameters of a host task
Definition cuda_graph_exec.hpp:268
void transform(cudaTask task, I first, I last, O output, C c)
updates parameters of a transform kernel task created from the CUDA graph of *this
Definition transform.hpp:94
void memset(cudaTask task, void *dst, int ch, size_t count)
updates parameters of a memset task
Definition cuda_graph_exec.hpp:324
void single_task(cudaTask task, C c)
updates a single-threaded kernel task
void memcpy(cudaTask task, void *tgt, const void *src, size_t bytes)
updates parameters of a memcpy task
Definition cuda_graph_exec.hpp:311
void copy(cudaTask task, T *tgt, const T *src, size_t num)
updates parameters of a memcpy task to a copy task
Definition cuda_graph_exec.hpp:301
void for_each(cudaTask task, I first, I last, C callable)
updates parameters of a for_each kernel task created from the CUDA graph of *this
Definition for_each.hpp:66
void fill(cudaTask task, T *dst, T value, size_t count)
updates parameters of a memset task to a fill task
Definition cuda_graph_exec.hpp:337
class to create functors for constructing executable CUDA graphs
Definition cuda_graph_exec.hpp:19
cudaGraphExec_t operator()() const
returns a null executable CUDA graph
Definition cuda_graph_exec.hpp:26
class to create a functor for deleting an executable CUDA graph
Definition cuda_graph_exec.hpp:65
void operator()(cudaGraphExec_t executable) const
deletes an executable CUDA graph
Definition cuda_graph_exec.hpp:76
class to create a CUDA stream with unique ownership
Definition cuda_stream.hpp:189
cudaStreamBase(ArgsT &&... args)
constructs a cudaStream object by passing the given arguments to the stream creator
Definition cuda_stream.hpp:211
cudaStreamBase & run(const cudaGraphExecBase< C, D > &exec)
runs the given executable CUDA graph
class to create a task handle of a CUDA Graph node
Definition cuda_graph.hpp:315
taskflow namespace
Definition small_vector.hpp:20
cudaMemsetParams cuda_get_zero_parms(T *dst, size_t count)
gets the memset node parameter of a zero task (typed)
Definition cuda_graph.hpp:114
cudaMemcpy3DParms cuda_get_memcpy_parms(void *tgt, const void *src, size_t bytes)
gets the memcpy node parameter of a memcpy task (untyped)
Definition cuda_graph.hpp:44
cudaMemsetParams cuda_get_memset_parms(void *dst, int ch, size_t count)
gets the memset node parameter of a memcpy task (untyped)
Definition cuda_graph.hpp:69
cudaMemsetParams cuda_get_fill_parms(T *dst, T value, size_t count)
gets the memset node parameter of a fill task (typed)
Definition cuda_graph.hpp:90
cudaMemcpy3DParms cuda_get_copy_parms(T *tgt, const T *src, size_t num)
gets the memcpy node parameter of a copy task
Definition cuda_graph.hpp:23