template<typename Creator, typename Deleter>
tf::cudaGraphExecBase class

class to create an executable CUDA graph managed by C++ smart pointer

Template parameters
Creator functor to create the stream (used in constructor)
Deleter functor to delete the stream (used in destructor)

This class wraps a cudaGraphExec_t handle with std::unique_ptr to ensure proper resource management and automatic cleanup.

Public types

using base_type = std::unique_ptr<std::remove_pointer_t<cudaGraphExec_t>, Deleter>
base std::unique_ptr type

Constructors, destructors, conversion operators

template<typename... ArgsT>
cudaGraphExecBase(ArgsT && ... args) explicit
constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator
operator cudaGraphExec_t() const noexcept
implicit conversion to the underlying cudaGraphExec_t object

Public functions

void run(cudaStream_t stream)
runs the executable graph via the given CUDA stream
template<typename C>
void host(cudaTask task, C&& callable, void* user_data)
updates parameters of a host task
template<typename F, typename... ArgsT>
void kernel(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)
updates parameters of a kernel task
void memset(cudaTask task, void* dst, int ch, size_t count)
updates parameters of a memset task
void memcpy(cudaTask task, void* tgt, const void* src, size_t bytes)
updates parameters of a memcpy task
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
void zero(cudaTask task, T* dst, size_t count)
updates parameters of a memset task to a zero task
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
void fill(cudaTask task, T* dst, T value, size_t count)
updates parameters of a memset task to a fill task
template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>
void copy(cudaTask task, T* tgt, const T* src, size_t num)
updates parameters of a memcpy task to a copy task

Function documentation

template<typename Creator, typename Deleter> template<typename... ArgsT>
tf::cudaGraphExecBase<Creator, Deleter>::cudaGraphExecBase(ArgsT && ... args) explicit

constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator

Parameters
args arguments to pass to the executable CUDA graph creator

Constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator

template<typename Creator, typename Deleter>
tf::cudaGraphExecBase<Creator, Deleter>::operator cudaGraphExec_t() const noexcept

implicit conversion to the underlying cudaGraphExec_t object

Returns the underlying cudaGraphExec_t object, equivalently calling base_type::get().

template<typename Creator, typename Deleter> template<typename C>
void tf::cudaGraphExecBase<Creator, Deleter>::host(cudaTask task, C&& callable, void* user_data)

updates parameters of a host task

This method updates the parameter of the given host task (similar to tf::cudaFlow::host).

template<typename Creator, typename Deleter> template<typename F, typename... ArgsT>
void tf::cudaGraphExecBase<Creator, Deleter>::kernel(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)

updates parameters of a kernel task

The method is similar to tf::cudaFlow::kernel but operates on a task of type tf::cudaTaskType::KERNEL. The kernel function name must NOT change.

template<typename Creator, typename Deleter>
void tf::cudaGraphExecBase<Creator, Deleter>::memset(cudaTask task, void* dst, int ch, size_t count)

updates parameters of a memset task

The method is similar to tf::cudaFlow::memset but operates on a task of type tf::cudaTaskType::MEMSET. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

template<typename Creator, typename Deleter>
void tf::cudaGraphExecBase<Creator, Deleter>::memcpy(cudaTask task, void* tgt, const void* src, size_t bytes)

updates parameters of a memcpy task

The method is similar to tf::cudaFlow::memcpy but operates on a task of type tf::cudaTaskType::MEMCPY. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

template<typename Creator, typename Deleter> template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
void tf::cudaGraphExecBase<Creator, Deleter>::zero(cudaTask task, T* dst, size_t count)

updates parameters of a memset task to a zero task

The method is similar to tf::cudaFlow::zero but operates on a task of type tf::cudaTaskType::MEMSET.

The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

template<typename Creator, typename Deleter> template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
void tf::cudaGraphExecBase<Creator, Deleter>::fill(cudaTask task, T* dst, T value, size_t count)

updates parameters of a memset task to a fill task

The method is similar to tf::cudaFlow::fill but operates on a task of type tf::cudaTaskType::MEMSET.

The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

template<typename Creator, typename Deleter> template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>
void tf::cudaGraphExecBase<Creator, Deleter>::copy(cudaTask task, T* tgt, const T* src, size_t num)

updates parameters of a memcpy task to a copy task

The method is similar to tf::cudaFlow::copy but operates on a task of type tf::cudaTaskType::MEMCPY. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.