template<typename Creator, typename Deleter>
cudaGraphBase class
class to create a CUDA graph managed by C++ smart pointer
Template parameters | |
---|---|
Creator | functor to create the stream (used in constructor) |
Deleter | functor to delete the stream (used in destructor) |
This class wraps a cudaGraph_t
handle with std::
Derived classes
- class cudaFlow
- class to create a cudaFlow task dependency graph
Public types
-
using base_type = std::
unique_ptr<std::remove_pointer_t<cudaGraph_t>, Deleter> - base std::
unique_ptr type
Constructors, destructors, conversion operators
-
template<typename... ArgsT>cudaGraphBase(ArgsT && ... args) explicit
- constructs a
cudaGraph
object by passing the given arguments to the executable CUDA graph creator - operator cudaGraph_t() const noexcept
- implicit conversion to the underlying
cudaGraph_t
object
Public functions
- auto num_nodes() const -> size_t
- queries the number of nodes in a native CUDA graph
- auto num_edges() const -> size_t
- queries the number of edges in a native CUDA graph
- auto empty() const -> bool
- queries if the graph is empty
-
void dump(std::
ostream& os) - dumps the CUDA graph to a DOT format through the given output stream
- auto noop() -> cudaTask
- creates a no-operation task
-
template<typename C>auto host(C&& callable, void* user_data) -> cudaTask
- creates a host task that runs a callable on the host
-
template<typename F, typename... ArgsT>auto kernel(dim3 g, dim3 b, size_t s, F f, ArgsT... args) -> cudaTask
- creates a kernel task
- auto memset(void* dst, int v, size_t count) -> cudaTask
- creates a memset task that fills untyped data with a byte value
- auto memcpy(void* tgt, const void* src, size_t bytes) -> cudaTask
- creates a memcpy task that copies untyped data in bytes
-
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>auto zero(T* dst, size_t count) -> cudaTask
- creates a memset task that sets a typed memory block to zero
-
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>auto fill(T* dst, T value, size_t count) -> cudaTask
- creates a memset task that fills a typed memory block with a value
-
template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>auto copy(T* tgt, const T* src, size_t num) -> cudaTask
- creates a memcopy task that copies typed data
Function documentation
template<typename Creator, typename Deleter>
template<typename... ArgsT>
tf:: cudaGraphBase<Creator, Deleter>:: cudaGraphBase(ArgsT && ... args) explicit
constructs a cudaGraph
object by passing the given arguments to the executable CUDA graph creator
Parameters | |
---|---|
args | arguments to pass to the executable CUDA graph creator |
Constructs a cudaGraph
object by passing the given arguments to the executable CUDA graph creator
template<typename Creator, typename Deleter>
tf:: cudaGraphBase<Creator, Deleter>:: operator cudaGraph_t() const noexcept
implicit conversion to the underlying cudaGraph_t
object
Returns the underlying cudaGraph_t
object, equivalently calling base_type::
template<typename Creator, typename Deleter>
void tf:: cudaGraphBase<Creator, Deleter>:: dump(std:: ostream& os)
dumps the CUDA graph to a DOT format through the given output stream
Parameters | |
---|---|
os | target output stream |
template<typename Creator, typename Deleter>
cudaTask tf:: cudaGraphBase<Creator, Deleter>:: noop()
creates a no-operation task
Returns | a tf:: |
---|
An empty node performs no operation during execution, but can be used for transitive ordering. For example, a phased execution graph with 2 groups of n
nodes with a barrier between them can be represented using an empty node and 2*n
dependency edges, rather than no empty node and n^2
dependency edges.
template<typename Creator, typename Deleter>
template<typename C>
cudaTask tf:: cudaGraphBase<Creator, Deleter>:: host(C&& callable,
void* user_data)
creates a host task that runs a callable on the host
Template parameters | |
---|---|
C | callable type |
Parameters | |
callable | a callable object with neither arguments nor return (i.e., constructible from std::function<void()> ) |
user_data | a pointer to the user data |
Returns | a tf:: |
A host task can only execute CPU-specific functions and cannot do any CUDA calls (e.g., cudaMalloc
).
template<typename Creator, typename Deleter>
template<typename F, typename... ArgsT>
cudaTask tf:: cudaGraphBase<Creator, Deleter>:: kernel(dim3 g,
dim3 b,
size_t s,
F f,
ArgsT... args)
creates a kernel task
Template parameters | |
---|---|
F | kernel function type |
ArgsT | kernel function parameters type |
Parameters | |
g | configured grid |
b | configured block |
s | configured shared memory size in bytes |
f | kernel function |
args | arguments to forward to the kernel function by copy |
Returns | a tf:: |
template<typename Creator, typename Deleter>
cudaTask tf:: cudaGraphBase<Creator, Deleter>:: memset(void* dst,
int v,
size_t count)
creates a memset task that fills untyped data with a byte value
Parameters | |
---|---|
dst | pointer to the destination device memory area |
v | value to set for each byte of specified memory |
count | size in bytes to set |
Returns | a tf:: |
A memset task fills the first count
bytes of device memory area pointed by dst
with the byte value v
.
template<typename Creator, typename Deleter>
cudaTask tf:: cudaGraphBase<Creator, Deleter>:: memcpy(void* tgt,
const void* src,
size_t bytes)
creates a memcpy task that copies untyped data in bytes
Parameters | |
---|---|
tgt | pointer to the target memory block |
src | pointer to the source memory block |
bytes | bytes to copy |
Returns | a tf:: |
A memcpy task transfers bytes
of data from a source location to a target location. Direction can be arbitrary among CPUs and GPUs.
template<typename Creator, typename Deleter>
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
cudaTask tf:: cudaGraphBase<Creator, Deleter>:: zero(T* dst,
size_t count)
creates a memset task that sets a typed memory block to zero
Template parameters | |
---|---|
T | element type (size of T must be either 1, 2, or 4) |
Parameters | |
dst | pointer to the destination device memory area |
count | number of elements |
Returns | a tf:: |
A zero task zeroes the first count
elements of type T
in a device memory area pointed by dst
.
template<typename Creator, typename Deleter>
template<typename T, std::enable_if_t<is_pod_v<T> && (sizeof(T)==1||sizeof(T)==2||sizeof(T)==4), void>* = nullptr>
cudaTask tf:: cudaGraphBase<Creator, Deleter>:: fill(T* dst,
T value,
size_t count)
creates a memset task that fills a typed memory block with a value
Template parameters | |
---|---|
T | element type (size of T must be either 1, 2, or 4) |
Parameters | |
dst | pointer to the destination device memory area |
value | value to fill for each element of type T |
count | number of elements |
Returns | a tf:: |
A fill task fills the first count
elements of type T
with value
in a device memory area pointed by dst
. The value to fill is interpreted in type T
rather than byte.
template<typename Creator, typename Deleter>
template<typename T, std::enable_if_t<!std::is_same_v<T, void>, void>* = nullptr>
cudaTask tf:: cudaGraphBase<Creator, Deleter>:: copy(T* tgt,
const T* src,
size_t num)
creates a memcopy task that copies typed data
Template parameters | |
---|---|
T | element type (non-void) |
Parameters | |
tgt | pointer to the target memory block |
src | pointer to the source memory block |
num | number of elements to copy |
Returns | a tf:: |
A copy task transfers num*sizeof(T)
bytes of data from a source location to a target location. Direction can be arbitrary among CPUs and GPUs.