template<typename Creator, typename Deleter>
cudaGraphExecBase class
class to create an executable CUDA graph managed by C++ smart pointer
Template parameters | |
---|---|
Creator | functor to create the stream (used in constructor) |
Deleter | functor to delete the stream (used in destructor) |
This class wraps a cudaGraphExec_t
handle with std::
to ensure proper resource management and automatic cleanup.
Public types
-
using base_type = std::
unique_ptr<std::remove_pointer_t<cudaGraphExec_t>, Deleter> - base std::
unique_ptr type
Constructors, destructors, conversion operators
-
template<typename... ArgsT>cudaGraphExecBase(ArgsT && ... args) explicit
- constructs a
cudaGraphExec
object by passing the given arguments to the executable CUDA graph creator - cudaGraphExecBase(cudaGraphExecBase&&) defaulted
- constructs a
cudaGraphExec
from the given rhs using move semantics
Public functions
- auto operator=(cudaGraphExecBase&&) -> cudaGraphExecBase& defaulted
- assign the rhs to
*this
using move semantics -
template<typename C>void host(cudaTask task, C&& callable, void* user_data)
- updates parameters of a host task
-
template<typename F, typename... ArgsT>void kernel(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)
- updates parameters of a kernel task
- void memset(cudaTask task, void* dst, int ch, size_t count)
- updates parameters of a memset task
- void memcpy(cudaTask task, void* tgt, const void* src, size_t bytes)
- updates parameters of a memcpy task
-
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>void zero(cudaTask task, T* dst, size_t count)
- updates parameters of a memset task to a zero task
-
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>void fill(cudaTask task, T* dst, T value, size_t count)
- updates parameters of a memset task to a fill task
-
template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>void copy(cudaTask task, T* tgt, const T* src, size_t num)
- updates parameters of a memcpy task to a copy task
-
template<typename C>void single_task(cudaTask task, C c)
- updates a single-threaded kernel task
-
template<typename I, typename C, typename E = cudaDefaultExecutionPolicy>void for_each(cudaTask task, I first, I last, C callable)
- updates parameters of a
for_each
kernel task created from the CUDA graph of*this
-
template<typename I, typename C, typename E = cudaDefaultExecutionPolicy>void for_each_index(cudaTask task, I first, I last, I step, C callable)
- updates parameters of a
for_each_index
kernel task created from the CUDA graph of*this
-
template<typename I, typename O, typename C, typename E = cudaDefaultExecutionPolicy>void transform(cudaTask task, I first, I last, O output, C c)
- updates parameters of a
transform
kernel task created from the CUDA graph of*this
-
template<typename I1, typename I2, typename O, typename C, typename E = cudaDefaultExecutionPolicy>void transform(cudaTask task, I1 first1, I1 last1, I2 first2, O output, C c)
- updates parameters of a
transform
kernel task created from the CUDA graph of*this
Function documentation
template<typename Creator, typename Deleter>
template<typename... ArgsT>
tf:: cudaGraphExecBase<Creator, Deleter>:: cudaGraphExecBase(ArgsT && ... args) explicit
constructs a cudaGraphExec
object by passing the given arguments to the executable CUDA graph creator
Parameters | |
---|---|
args | arguments to pass to the executable CUDA graph creator |
Constructs a cudaGraphExec
object by passing the given arguments to the executable CUDA graph creator
template<typename Creator, typename Deleter>
template<typename F, typename... ArgsT>
void tf:: cudaGraphExecBase<Creator, Deleter>:: kernel(cudaTask task,
dim3 g,
dim3 b,
size_t shm,
F f,
ArgsT... args)
updates parameters of a kernel task
The method is similar to tf::cudaFlow::kernel but operates on a task of type tf::cudaTaskType::KERNEL. The kernel function name must NOT change.
template<typename Creator, typename Deleter>
void tf:: cudaGraphExecBase<Creator, Deleter>:: memset(cudaTask task,
void* dst,
int ch,
size_t count)
updates parameters of a memset task
The method is similar to tf::cudaFlow::memset but operates on a task of type tf::cudaTaskType::MEMSET. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.
template<typename Creator, typename Deleter>
void tf:: cudaGraphExecBase<Creator, Deleter>:: memcpy(cudaTask task,
void* tgt,
const void* src,
size_t bytes)
updates parameters of a memcpy task
The method is similar to tf::cudaFlow::memcpy but operates on a task of type tf::cudaTaskType::MEMCPY. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.
template<typename Creator, typename Deleter>
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
void tf:: cudaGraphExecBase<Creator, Deleter>:: zero(cudaTask task,
T* dst,
size_t count)
updates parameters of a memset task to a zero task
The method is similar to tf::cudaFlow::zero but operates on a task of type tf::cudaTaskType::MEMSET.
The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.
template<typename Creator, typename Deleter>
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
void tf:: cudaGraphExecBase<Creator, Deleter>:: fill(cudaTask task,
T* dst,
T value,
size_t count)
updates parameters of a memset task to a fill task
The method is similar to tf::cudaFlow::fill but operates on a task of type tf::cudaTaskType::MEMSET.
The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.
template<typename Creator, typename Deleter>
template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>
void tf:: cudaGraphExecBase<Creator, Deleter>:: copy(cudaTask task,
T* tgt,
const T* src,
size_t num)
updates parameters of a memcpy task to a copy task
The method is similar to tf::cudaFlow::copy but operates on a task of type tf::cudaTaskType::MEMCPY. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.
template<typename Creator, typename Deleter>
template<typename C>
void tf:: cudaGraphExecBase<Creator, Deleter>:: single_task(cudaTask task,
C c)
updates a single-threaded kernel task
This method is similar to cudaFlow::single_task but operates on an existing task.