template<typename Creator, typename Deleter>
tf::cudaGraphExecBase class

class to create an executable CUDA graph with unique ownership

Template parameters
Creator functor to create the stream (used in constructor)
Deleter functor to delete the stream (used in destructor)

This class wraps a cudaGraphExec_t handle with std::unique_ptr to ensure proper resource management and automatic cleanup.

Public types

using base_type = std::unique_ptr<std::remove_pointer_t<cudaGraphExec_t>, Deleter>
base std::unique_ptr type

Constructors, destructors, conversion operators

template<typename... ArgsT>
cudaGraphExecBase(ArgsT && ... args) explicit
constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator
cudaGraphExecBase(cudaGraphExecBase&&) defaulted
constructs a cudaGraphExec from the given rhs using move semantics

Public functions

auto operator=(cudaGraphExecBase&&) -> cudaGraphExecBase& defaulted
assign the rhs to *this using move semantics
template<typename C>
void host(cudaTask task, C&& callable, void* user_data)
updates parameters of a host task
template<typename F, typename... ArgsT>
void kernel(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)
updates parameters of a kernel task
void memset(cudaTask task, void* dst, int ch, size_t count)
updates parameters of a memset task
void memcpy(cudaTask task, void* tgt, const void* src, size_t bytes)
updates parameters of a memcpy task
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
void zero(cudaTask task, T* dst, size_t count)
updates parameters of a memset task to a zero task
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
void fill(cudaTask task, T* dst, T value, size_t count)
updates parameters of a memset task to a fill task
template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>
void copy(cudaTask task, T* tgt, const T* src, size_t num)
updates parameters of a memcpy task to a copy task
template<typename C>
void single_task(cudaTask task, C c)
updates a single-threaded kernel task
template<typename I, typename C, typename E = cudaDefaultExecutionPolicy>
void for_each(cudaTask task, I first, I last, C callable)
updates parameters of a for_each kernel task created from the CUDA graph of *this
template<typename I, typename C, typename E = cudaDefaultExecutionPolicy>
void for_each_index(cudaTask task, I first, I last, I step, C callable)
updates parameters of a for_each_index kernel task created from the CUDA graph of *this
template<typename I, typename O, typename C, typename E = cudaDefaultExecutionPolicy>
void transform(cudaTask task, I first, I last, O output, C c)
updates parameters of a transform kernel task created from the CUDA graph of *this
template<typename I1, typename I2, typename O, typename C, typename E = cudaDefaultExecutionPolicy>
void transform(cudaTask task, I1 first1, I1 last1, I2 first2, O output, C c)
updates parameters of a transform kernel task created from the CUDA graph of *this

Function documentation

template<typename Creator, typename Deleter> template<typename... ArgsT>
tf::cudaGraphExecBase<Creator, Deleter>::cudaGraphExecBase(ArgsT && ... args) explicit

constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator

Parameters
args arguments to pass to the executable CUDA graph creator

Constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator

template<typename Creator, typename Deleter> template<typename C>
void tf::cudaGraphExecBase<Creator, Deleter>::host(cudaTask task, C&& callable, void* user_data)

updates parameters of a host task

This method updates the parameter of the given host task (similar to tf::cudaFlow::host).

template<typename Creator, typename Deleter> template<typename F, typename... ArgsT>
void tf::cudaGraphExecBase<Creator, Deleter>::kernel(cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)

updates parameters of a kernel task

The method is similar to tf::cudaFlow::kernel but operates on a task of type tf::cudaTaskType::KERNEL. The kernel function name must NOT change.

template<typename Creator, typename Deleter>
void tf::cudaGraphExecBase<Creator, Deleter>::memset(cudaTask task, void* dst, int ch, size_t count)

updates parameters of a memset task

The method is similar to tf::cudaFlow::memset but operates on a task of type tf::cudaTaskType::MEMSET. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

template<typename Creator, typename Deleter>
void tf::cudaGraphExecBase<Creator, Deleter>::memcpy(cudaTask task, void* tgt, const void* src, size_t bytes)

updates parameters of a memcpy task

The method is similar to tf::cudaFlow::memcpy but operates on a task of type tf::cudaTaskType::MEMCPY. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

template<typename Creator, typename Deleter> template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
void tf::cudaGraphExecBase<Creator, Deleter>::zero(cudaTask task, T* dst, size_t count)

updates parameters of a memset task to a zero task

The method is similar to tf::cudaFlow::zero but operates on a task of type tf::cudaTaskType::MEMSET.

The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

template<typename Creator, typename Deleter> template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
void tf::cudaGraphExecBase<Creator, Deleter>::fill(cudaTask task, T* dst, T value, size_t count)

updates parameters of a memset task to a fill task

The method is similar to tf::cudaFlow::fill but operates on a task of type tf::cudaTaskType::MEMSET.

The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

template<typename Creator, typename Deleter> template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>
void tf::cudaGraphExecBase<Creator, Deleter>::copy(cudaTask task, T* tgt, const T* src, size_t num)

updates parameters of a memcpy task to a copy task

The method is similar to tf::cudaFlow::copy but operates on a task of type tf::cudaTaskType::MEMCPY. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

template<typename Creator, typename Deleter> template<typename C>
void tf::cudaGraphExecBase<Creator, Deleter>::single_task(cudaTask task, C c)

updates a single-threaded kernel task

This method is similar to cudaFlow::single_task but operates on an existing task.