template<typename Creator, typename Deleter>
cudaGraphExecBase class
class to create an executable CUDA graph managed by C++ smart pointer
Template parameters | |
---|---|
Creator | functor to create the stream (used in constructor) |
Deleter | functor to delete the stream (used in destructor) |
This class wraps a cudaGraphExec_t
handle with std::
to ensure proper resource management and automatic cleanup.
Public types
-
using base_type = std::
unique_ptr<std::remove_pointer_t<cudaGraphExec_t>, Deleter> - base std::
unique_ptr type
Constructors, destructors, conversion operators
-
template<typename... ArgsT>cudaGraphExecBase(ArgsT && ... args) explicit
- constructs a
cudaGraphExec
object by passing the given arguments to the executable CUDA graph creator - operator cudaGraphExec_t() const noexcept
- implicit conversion to the underlying
cudaGraphExec_t
object
Public functions
- void run(cudaStream_t stream)
- runs the executable graph via the given CUDA stream
-
template<typename C>void host(cudaTask task, C&& callable, void* user_data)
- updates parameters of a host task
-
template<typename F, typename... ArgsT>void kernel(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)
- updates parameters of a kernel task
- void memset(cudaTask task, void* dst, int ch, size_t count)
- updates parameters of a memset task
- void memcpy(cudaTask task, void* tgt, const void* src, size_t bytes)
- updates parameters of a memcpy task
-
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>void zero(cudaTask task, T* dst, size_t count)
- updates parameters of a memset task to a zero task
-
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>void fill(cudaTask task, T* dst, T value, size_t count)
- updates parameters of a memset task to a fill task
-
template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>void copy(cudaTask task, T* tgt, const T* src, size_t num)
- updates parameters of a memcpy task to a copy task
Function documentation
template<typename Creator, typename Deleter>
template<typename... ArgsT>
tf:: cudaGraphExecBase<Creator, Deleter>:: cudaGraphExecBase(ArgsT && ... args) explicit
constructs a cudaGraphExec
object by passing the given arguments to the executable CUDA graph creator
Parameters | |
---|---|
args | arguments to pass to the executable CUDA graph creator |
Constructs a cudaGraphExec
object by passing the given arguments to the executable CUDA graph creator
template<typename Creator, typename Deleter>
tf:: cudaGraphExecBase<Creator, Deleter>:: operator cudaGraphExec_t() const noexcept
implicit conversion to the underlying cudaGraphExec_t
object
Returns the underlying cudaGraphExec_t
object, equivalently calling base_type::get().
template<typename Creator, typename Deleter>
template<typename C>
void tf:: cudaGraphExecBase<Creator, Deleter>:: host(cudaTask task,
C&& callable,
void* user_data)
updates parameters of a host task
This method updates the parameter of the given host task (similar to tf::
template<typename Creator, typename Deleter>
template<typename F, typename... ArgsT>
void tf:: cudaGraphExecBase<Creator, Deleter>:: kernel(cudaTask task,
dim3 g,
dim3 b,
size_t shm,
F f,
ArgsT... args)
updates parameters of a kernel task
The method is similar to tf::
template<typename Creator, typename Deleter>
void tf:: cudaGraphExecBase<Creator, Deleter>:: memset(cudaTask task,
void* dst,
int ch,
size_t count)
updates parameters of a memset task
The method is similar to tf::
template<typename Creator, typename Deleter>
void tf:: cudaGraphExecBase<Creator, Deleter>:: memcpy(cudaTask task,
void* tgt,
const void* src,
size_t bytes)
updates parameters of a memcpy task
The method is similar to tf::
template<typename Creator, typename Deleter>
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
void tf:: cudaGraphExecBase<Creator, Deleter>:: zero(cudaTask task,
T* dst,
size_t count)
updates parameters of a memset task to a zero task
The method is similar to tf::
The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.
template<typename Creator, typename Deleter>
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
void tf:: cudaGraphExecBase<Creator, Deleter>:: fill(cudaTask task,
T* dst,
T value,
size_t count)
updates parameters of a memset task to a fill task
The method is similar to tf::
The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.
template<typename Creator, typename Deleter>
template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>
void tf:: cudaGraphExecBase<Creator, Deleter>:: copy(cudaTask task,
T* tgt,
const T* src,
size_t num)
updates parameters of a memcpy task to a copy task
The method is similar to tf::