Loading...
Searching...
No Matches
tf::cudaGraphExecBase< Creator, Deleter > Class Template Reference

class to create an executable CUDA graph with unique ownership More...

#include <taskflow/cuda/cuda_graph_exec.hpp>

Inheritance diagram for tf::cudaGraphExecBase< Creator, Deleter >:
[legend]
Collaboration diagram for tf::cudaGraphExecBase< Creator, Deleter >:
[legend]

Public Types

using base_type = std::unique_ptr<std::remove_pointer_t<cudaGraphExec_t>, Deleter>
 base std::unique_ptr type
 

Public Member Functions

template<typename... ArgsT>
 cudaGraphExecBase (ArgsT &&... args)
 constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator
 
 cudaGraphExecBase (cudaGraphExecBase &&)=default
 constructs a cudaGraphExec from the given rhs using move semantics
 
cudaGraphExecBaseoperator= (cudaGraphExecBase &&)=default
 assign the rhs to *this using move semantics
 
template<typename C>
void host (cudaTask task, C &&callable, void *user_data)
 updates parameters of a host task
 
template<typename F, typename... ArgsT>
void kernel (cudaTask task, dim3 g, dim3 b, size_t shm, F f, ArgsT... args)
 updates parameters of a kernel task
 
void memset (cudaTask task, void *dst, int ch, size_t count)
 updates parameters of a memset task
 
void memcpy (cudaTask task, void *tgt, const void *src, size_t bytes)
 updates parameters of a memcpy task
 
template<typename T, std::enable_if_t< is_pod_v< T > &&(sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void > * = nullptr>
void zero (cudaTask task, T *dst, size_t count)
 updates parameters of a memset task to a zero task
 
template<typename T, std::enable_if_t< is_pod_v< T > &&(sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void > * = nullptr>
void fill (cudaTask task, T *dst, T value, size_t count)
 updates parameters of a memset task to a fill task
 
template<typename T, std::enable_if_t<!std::is_same_v< T, void >, void > * = nullptr>
void copy (cudaTask task, T *tgt, const T *src, size_t num)
 updates parameters of a memcpy task to a copy task
 
template<typename C>
void single_task (cudaTask task, C c)
 updates a single-threaded kernel task
 
template<typename I, typename C, typename E = cudaDefaultExecutionPolicy>
void for_each (cudaTask task, I first, I last, C callable)
 updates parameters of a for_each kernel task created from the CUDA graph of *this
 
template<typename I, typename C, typename E = cudaDefaultExecutionPolicy>
void for_each_index (cudaTask task, I first, I last, I step, C callable)
 updates parameters of a for_each_index kernel task created from the CUDA graph of *this
 
template<typename I, typename O, typename C, typename E = cudaDefaultExecutionPolicy>
void transform (cudaTask task, I first, I last, O output, C c)
 updates parameters of a transform kernel task created from the CUDA graph of *this
 
template<typename I1, typename I2, typename O, typename C, typename E = cudaDefaultExecutionPolicy>
void transform (cudaTask task, I1 first1, I1 last1, I2 first2, O output, C c)
 updates parameters of a transform kernel task created from the CUDA graph of *this
 

Detailed Description

template<typename Creator, typename Deleter>
class tf::cudaGraphExecBase< Creator, Deleter >

class to create an executable CUDA graph with unique ownership

Template Parameters
Creatorfunctor to create the stream (used in constructor)
Deleterfunctor to delete the stream (used in destructor)

This class wraps a cudaGraphExec_t handle with std::unique_ptr to ensure proper resource management and automatic cleanup.

Constructor & Destructor Documentation

◆ cudaGraphExecBase()

template<typename Creator, typename Deleter>
template<typename... ArgsT>
tf::cudaGraphExecBase< Creator, Deleter >::cudaGraphExecBase ( ArgsT &&... args)
inlineexplicit

constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator

Constructs a cudaGraphExec object by passing the given arguments to the executable CUDA graph creator

Parameters
argsarguments to pass to the executable CUDA graph creator

Member Function Documentation

◆ copy()

template<typename Creator, typename Deleter>
template<typename T, std::enable_if_t<!std::is_same_v< T, void >, void > *>
void tf::cudaGraphExecBase< Creator, Deleter >::copy ( cudaTask task,
T * tgt,
const T * src,
size_t num )

updates parameters of a memcpy task to a copy task

The method is similar to tf::cudaFlow::copy but operates on a task of type tf::cudaTaskType::MEMCPY. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

◆ fill()

template<typename Creator, typename Deleter>
template<typename T, std::enable_if_t< is_pod_v< T > &&(sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void > *>
void tf::cudaGraphExecBase< Creator, Deleter >::fill ( cudaTask task,
T * dst,
T value,
size_t count )

updates parameters of a memset task to a fill task

The method is similar to tf::cudaFlow::fill but operates on a task of type tf::cudaTaskType::MEMSET.

The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

◆ host()

template<typename Creator, typename Deleter>
template<typename C>
void tf::cudaGraphExecBase< Creator, Deleter >::host ( cudaTask task,
C && callable,
void * user_data )

updates parameters of a host task

This method updates the parameter of the given host task (similar to tf::cudaFlow::host).

◆ kernel()

template<typename Creator, typename Deleter>
template<typename F, typename... ArgsT>
void tf::cudaGraphExecBase< Creator, Deleter >::kernel ( cudaTask task,
dim3 g,
dim3 b,
size_t shm,
F f,
ArgsT... args )

updates parameters of a kernel task

The method is similar to tf::cudaFlow::kernel but operates on a task of type tf::cudaTaskType::KERNEL. The kernel function name must NOT change.

◆ memcpy()

template<typename Creator, typename Deleter>
void tf::cudaGraphExecBase< Creator, Deleter >::memcpy ( cudaTask task,
void * tgt,
const void * src,
size_t bytes )

updates parameters of a memcpy task

The method is similar to tf::cudaFlow::memcpy but operates on a task of type tf::cudaTaskType::MEMCPY. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

◆ memset()

template<typename Creator, typename Deleter>
void tf::cudaGraphExecBase< Creator, Deleter >::memset ( cudaTask task,
void * dst,
int ch,
size_t count )

updates parameters of a memset task

The method is similar to tf::cudaFlow::memset but operates on a task of type tf::cudaTaskType::MEMSET. The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.

◆ single_task()

template<typename Creator, typename Deleter>
template<typename C>
void tf::cudaGraphExecBase< Creator, Deleter >::single_task ( cudaTask task,
C c )

updates a single-threaded kernel task

This method is similar to cudaFlow::single_task but operates on an existing task.

◆ zero()

template<typename Creator, typename Deleter>
template<typename T, std::enable_if_t< is_pod_v< T > &&(sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void > *>
void tf::cudaGraphExecBase< Creator, Deleter >::zero ( cudaTask task,
T * dst,
size_t count )

updates parameters of a memset task to a zero task

The method is similar to tf::cudaFlow::zero but operates on a task of type tf::cudaTaskType::MEMSET.

The source/destination memory may have different address values but must be allocated from the same contexts as the original source/destination memory.


The documentation for this class was generated from the following files: