3#include "cuda_error.hpp"
17 TF_CHECK_CUDA(cudaGetDeviceCount(&N),
"failed to get device count");
18 return static_cast<size_t>(N);
26 TF_CHECK_CUDA(cudaGetDevice(&
id),
"failed to get current device id");
34 TF_CHECK_CUDA(cudaSetDevice(
id),
"failed to switch to device ",
id);
42 cudaGetDeviceProperties(&p, i),
"failed to get property of device ", i
52 cudaGetDeviceProperties(&p, i),
"failed to get property of device ", i
62 os <<
"Major revision number: " << p.major <<
'\n'
63 <<
"Minor revision number: " << p.minor <<
'\n'
64 <<
"Name: " << p.name <<
'\n'
65 <<
"Total global memory: " << p.totalGlobalMem <<
'\n'
66 <<
"Total shared memory per block: " << p.sharedMemPerBlock <<
'\n'
67 <<
"Total registers per block: " << p.regsPerBlock <<
'\n'
68 <<
"Warp size: " << p.warpSize <<
'\n'
69 <<
"Maximum memory pitch: " << p.memPitch <<
'\n'
70 <<
"Maximum threads per block: " << p.maxThreadsPerBlock <<
'\n';
72 os <<
"Maximum dimension of block: ";
73 for (
int i = 0; i < 3; ++i) {
75 os << p.maxThreadsDim[i];
79 os <<
"Maximum dimension of grid: ";
80 for (
int i = 0; i < 3; ++i) {
82 os << p.maxGridSize[i];;
85 os <<
"Total constant memory: " << p.totalConstMem <<
'\n'
86 <<
"Texture alignment: " << p.textureAlignment <<
'\n'
87 <<
"Number of multiprocessors: " << p.multiProcessorCount <<
'\n'
88 <<
"GPU sharing Host Memory: " << p.integrated <<
'\n'
89 <<
"Host page-locked mem mapping: " << p.canMapHostMemory <<
'\n'
90 <<
"Alignment for Surfaces: " << p.surfaceAlignment <<
'\n'
91 <<
"Device has ECC support: " << p.ECCEnabled <<
'\n'
92 <<
"Unified Addressing (UVA): " << p.unifiedAddressing <<
'\n';
101 cudaDeviceGetAttribute(&threads, cudaDevAttrMaxThreadsPerBlock, d),
102 "failed to query the maximum threads per block on device ", d
113 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxBlockDimX, d),
114 "failed to query the maximum x-dimension per block on device ", d
125 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxBlockDimY, d),
126 "failed to query the maximum y-dimension per block on device ", d
137 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxBlockDimZ, d),
138 "failed to query the maximum z-dimension per block on device ", d
149 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxGridDimX, d),
150 "failed to query the maximum x-dimension per grid on device ", d
161 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxGridDimY, d),
162 "failed to query the maximum y-dimension per grid on device ", d
173 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxGridDimZ, d),
174 "failed to query the maximum z-dimension per grid on device ", d
185 cudaDeviceGetAttribute(&num, cudaDevAttrMaxSharedMemoryPerBlock, d),
186 "failed to query the maximum shared memory per block on device ", d
197 cudaDeviceGetAttribute(&num, cudaDevAttrWarpSize, d),
198 "failed to query the warp size per block on device ", d
209 cudaDeviceGetAttribute(&num, cudaDevAttrComputeCapabilityMajor, d),
210 "failed to query the major number of compute capability of device ", d
221 cudaDeviceGetAttribute(&num, cudaDevAttrComputeCapabilityMinor, d),
222 "failed to query the minor number of compute capability of device ", d
233 cudaDeviceGetAttribute(&num, cudaDevAttrUnifiedAddressing, d),
234 "failed to query unified addressing status on device ", d
249 cudaDriverGetVersion(&num),
250 "failed to query the latest cuda version supported by the driver"
261 cudaRuntimeGetVersion(&num),
"failed to query cuda runtime version"
316 TF_CHECK_CUDA(cudaGetDevice(&_p),
"failed to get current device scope");
321 TF_CHECK_CUDA(cudaSetDevice(dev),
"failed to scope on device ", dev);
cudaScopedDevice(int device)
constructs a RAII-styled device switcher
Definition cuda_device.hpp:315
~cudaScopedDevice()
destructs the guard and switches back to the previous device context
Definition cuda_device.hpp:326
taskflow namespace
Definition small_vector.hpp:20
size_t cuda_get_device_max_z_dim_per_grid(int d)
queries the maximum z-dimension per grid on a device
Definition cuda_device.hpp:170
int cuda_get_device_compute_capability_major(int d)
queries the major number of compute capability of a device
Definition cuda_device.hpp:206
int cuda_get_device()
gets the current device associated with the caller thread
Definition cuda_device.hpp:24
int cuda_get_runtime_version()
queries the CUDA Runtime version (1000 * major + 10 * minor)
Definition cuda_device.hpp:258
void cuda_get_device_property(int i, cudaDeviceProp &p)
obtains the device property
Definition cuda_device.hpp:40
int cuda_get_driver_version()
queries the latest CUDA version (1000 * major + 10 * minor) supported by the driver
Definition cuda_device.hpp:246
size_t cuda_get_device_max_z_dim_per_block(int d)
queries the maximum z-dimension per block on a device
Definition cuda_device.hpp:134
size_t cuda_get_device_max_x_dim_per_grid(int d)
queries the maximum x-dimension per grid on a device
Definition cuda_device.hpp:146
int cuda_get_device_compute_capability_minor(int d)
queries the minor number of compute capability of a device
Definition cuda_device.hpp:218
size_t cuda_get_device_max_y_dim_per_grid(int d)
queries the maximum y-dimension per grid on a device
Definition cuda_device.hpp:158
size_t cuda_get_device_max_y_dim_per_block(int d)
queries the maximum y-dimension per block on a device
Definition cuda_device.hpp:122
size_t cuda_get_device_max_threads_per_block(int d)
queries the maximum threads per block on a device
Definition cuda_device.hpp:98
size_t cuda_get_num_devices()
queries the number of available devices
Definition cuda_device.hpp:15
bool cuda_get_device_unified_addressing(int d)
queries if the device supports unified addressing
Definition cuda_device.hpp:230
void cuda_set_device(int id)
switches to a given device context
Definition cuda_device.hpp:33
size_t cuda_get_device_warp_size(int d)
queries the warp size on a device
Definition cuda_device.hpp:194
size_t cuda_get_device_max_shm_per_block(int d)
queries the maximum shared memory size in bytes per block on a device
Definition cuda_device.hpp:182
size_t cuda_get_device_max_x_dim_per_block(int d)
queries the maximum x-dimension per block on a device
Definition cuda_device.hpp:110
void cuda_dump_device_property(std::ostream &os, const cudaDeviceProp &p)
dumps the device property
Definition cuda_device.hpp:60