class to create an executable CUDA graph with unique ownership More...
#include <taskflow/cuda/cuda_graph_exec.hpp>
Public Types | |
| using | base_type = std::unique_ptr<std::remove_pointer_t<cudaGraphExec_t>, Deleter> |
| base std::unique_ptr type | |
Public Member Functions | |
| template<typename... ArgsT> | |
| cudaGraphExecBase (ArgsT &&... args) | |
constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator | |
| cudaGraphExecBase (cudaGraphExecBase &&)=default | |
constructs a cudaGraphExec from the given rhs using move semantics | |
| cudaGraphExecBase & | operator= (cudaGraphExecBase &&)=default |
assign the rhs to *this using move semantics | |
| template<typename C> | |
| void | host (cudaTask task, C &&callable, void *user_data) |
| updates parameters of a host task | |
| template<typename F, typename... ArgsT> | |
| void | kernel (cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args) |
| updates parameters of a kernel task | |
| void | memset (cudaTask task, void *dst, int ch, size_t count) |
| updates parameters of a memset task | |
| void | memcpy (cudaTask task, void *tgt, const void *src, size_t bytes) |
| updates parameters of a memcpy task | |
| template<typename T, std::enable_if_t< is_pod_v< T > &&(sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void > * = nullptr> | |
| void | zero (cudaTask task, T *dst, size_t count) |
| updates parameters of a memset task to a zero task | |
| template<typename T, std::enable_if_t< is_pod_v< T > &&(sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void > * = nullptr> | |
| void | fill (cudaTask task, T *dst, T value, size_t count) |
| updates parameters of a memset task to a fill task | |
| template<typename T, std::enable_if_t<!std::is_same_v< T, void >, void > * = nullptr> | |
| void | copy (cudaTask task, T *tgt, const T *src, size_t num) |
| updates parameters of a memcpy task to a copy task | |
| template<typename C> | |
| void | single_task (cudaTask task, C c) |
| updates a single-threaded kernel task | |
| template<typename I, typename C, typename E = cudaDefaultExecutionPolicy> | |
| void | for_each (cudaTask task, I first, I last, C callable) |
updates parameters of a for_each kernel task created from the CUDA graph of *this | |
| template<typename I, typename C, typename E = cudaDefaultExecutionPolicy> | |
| void | for_each_index (cudaTask task, I first, I last, I step, C callable) |
updates parameters of a for_each_index kernel task created from the CUDA graph of *this | |
| template<typename I, typename O, typename C, typename E = cudaDefaultExecutionPolicy> | |
| void | transform (cudaTask task, I first, I last, O output, C c) |
updates parameters of a transform kernel task created from the CUDA graph of *this | |
| template<typename I1, typename I2, typename O, typename C, typename E = cudaDefaultExecutionPolicy> | |
| void | transform (cudaTask task, I1 first1, I1 last1, I2 first2, O output, C c) |
updates parameters of a transform kernel task created from the CUDA graph of *this | |
class to create an executable CUDA graph with unique ownership
| Creator | functor to create the stream (used in constructor) |
| Deleter | functor to delete the stream (used in destructor) |
This class wraps a cudaGraphExec_t handle with std::unique_ptr to ensure proper resource management and automatic cleanup.
|
inlineexplicit |
constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator
Constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator
| args | arguments to pass to the executable CUDA graph creator |
| void tf::cudaGraphExecBase< Creator, Deleter >::copy | ( | cudaTask | task, |
| T * | tgt, | ||
| const T * | src, | ||
| size_t | num ) |
updates parameters of a memcpy task to a copy task
The method is similar to tf::cudaFlow::copy but operates on a task of type tf::cudaTaskType::MEMCPY. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.
| void tf::cudaGraphExecBase< Creator, Deleter >::fill | ( | cudaTask | task, |
| T * | dst, | ||
| T | value, | ||
| size_t | count ) |
updates parameters of a memset task to a fill task
The method is similar to tf::cudaFlow::fill but operates on a task of type tf::cudaTaskType::MEMSET.
The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.
| void tf::cudaGraphExecBase< Creator, Deleter >::host | ( | cudaTask | task, |
| C && | callable, | ||
| void * | user_data ) |
updates parameters of a host task
This method updates the parameter of the given host task (similar to tf::cudaFlow::host).
| void tf::cudaGraphExecBase< Creator, Deleter >::kernel | ( | cudaTask | task, |
| dim3 | g, | ||
| dim3 | b, | ||
| size_t | shm, | ||
| F | f, | ||
| ArgsT... | args ) |
updates parameters of a kernel task
The method is similar to tf::cudaFlow::kernel but operates on a task of type tf::cudaTaskType::KERNEL. The kernel function name must NOT change.
| void tf::cudaGraphExecBase< Creator, Deleter >::memcpy | ( | cudaTask | task, |
| void * | tgt, | ||
| const void * | src, | ||
| size_t | bytes ) |
updates parameters of a memcpy task
The method is similar to tf::cudaFlow::memcpy but operates on a task of type tf::cudaTaskType::MEMCPY. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.
| void tf::cudaGraphExecBase< Creator, Deleter >::memset | ( | cudaTask | task, |
| void * | dst, | ||
| int | ch, | ||
| size_t | count ) |
updates parameters of a memset task
The method is similar to tf::cudaFlow::memset but operates on a task of type tf::cudaTaskType::MEMSET. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.
| void tf::cudaGraphExecBase< Creator, Deleter >::single_task | ( | cudaTask | task, |
| C | c ) |
updates a single-threaded kernel task
This method is similar to cudaFlow::single_task but operates on an existing task.
| void tf::cudaGraphExecBase< Creator, Deleter >::zero | ( | cudaTask | task, |
| T * | dst, | ||
| size_t | count ) |
updates parameters of a memset task to a zero task
The method is similar to tf::cudaFlow::zero but operates on a task of type tf::cudaTaskType::MEMSET.
The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.