Loading...
Searching...
No Matches
cuda_device.hpp
1#pragma once
2
3#include "cuda_error.hpp"
4
9
10namespace tf {
11
15inline size_t cuda_get_num_devices() {
16 int N = 0;
17 TF_CHECK_CUDA(cudaGetDeviceCount(&N), "failed to get device count");
18 return static_cast<size_t>(N);
19}
20
24inline int cuda_get_device() {
25 int id;
26 TF_CHECK_CUDA(cudaGetDevice(&id), "failed to get current device id");
27 return id;
28}
29
33inline void cuda_set_device(int id) {
34 TF_CHECK_CUDA(cudaSetDevice(id), "failed to switch to device ", id);
35}
36
40inline void cuda_get_device_property(int i, cudaDeviceProp& p) {
41 TF_CHECK_CUDA(
42 cudaGetDeviceProperties(&p, i), "failed to get property of device ", i
43 );
44}
45
49inline cudaDeviceProp cuda_get_device_property(int i) {
50 cudaDeviceProp p;
51 TF_CHECK_CUDA(
52 cudaGetDeviceProperties(&p, i), "failed to get property of device ", i
53 );
54 return p;
55}
56
60inline void cuda_dump_device_property(std::ostream& os, const cudaDeviceProp& p) {
61
62 os << "Major revision number: " << p.major << '\n'
63 << "Minor revision number: " << p.minor << '\n'
64 << "Name: " << p.name << '\n'
65 << "Total global memory: " << p.totalGlobalMem << '\n'
66 << "Total shared memory per block: " << p.sharedMemPerBlock << '\n'
67 << "Total registers per block: " << p.regsPerBlock << '\n'
68 << "Warp size: " << p.warpSize << '\n'
69 << "Maximum memory pitch: " << p.memPitch << '\n'
70 << "Maximum threads per block: " << p.maxThreadsPerBlock << '\n';
71
72 os << "Maximum dimension of block: ";
73 for (int i = 0; i < 3; ++i) {
74 if(i) os << 'x';
75 os << p.maxThreadsDim[i];
76 }
77 os << '\n';
78
79 os << "Maximum dimension of grid: ";
80 for (int i = 0; i < 3; ++i) {
81 if(i) os << 'x';
82 os << p.maxGridSize[i];;
83 }
84 os << '\n';
85 os << "Total constant memory: " << p.totalConstMem << '\n'
86 << "Texture alignment: " << p.textureAlignment << '\n'
87 << "Number of multiprocessors: " << p.multiProcessorCount << '\n'
88 << "GPU sharing Host Memory: " << p.integrated << '\n'
89 << "Host page-locked mem mapping: " << p.canMapHostMemory << '\n'
90 << "Alignment for Surfaces: " << p.surfaceAlignment << '\n'
91 << "Device has ECC support: " << p.ECCEnabled << '\n'
92 << "Unified Addressing (UVA): " << p.unifiedAddressing << '\n';
93}
94
99 int threads = 0;
100 TF_CHECK_CUDA(
101 cudaDeviceGetAttribute(&threads, cudaDevAttrMaxThreadsPerBlock, d),
102 "failed to query the maximum threads per block on device ", d
103 )
104 return threads;
105}
106
111 int dim = 0;
112 TF_CHECK_CUDA(
113 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxBlockDimX, d),
114 "failed to query the maximum x-dimension per block on device ", d
115 )
116 return dim;
117}
118
123 int dim = 0;
124 TF_CHECK_CUDA(
125 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxBlockDimY, d),
126 "failed to query the maximum y-dimension per block on device ", d
127 )
128 return dim;
129}
130
135 int dim = 0;
136 TF_CHECK_CUDA(
137 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxBlockDimZ, d),
138 "failed to query the maximum z-dimension per block on device ", d
139 )
140 return dim;
141}
142
147 int dim = 0;
148 TF_CHECK_CUDA(
149 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxGridDimX, d),
150 "failed to query the maximum x-dimension per grid on device ", d
151 )
152 return dim;
153}
154
159 int dim = 0;
160 TF_CHECK_CUDA(
161 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxGridDimY, d),
162 "failed to query the maximum y-dimension per grid on device ", d
163 )
164 return dim;
165}
166
171 int dim = 0;
172 TF_CHECK_CUDA(
173 cudaDeviceGetAttribute(&dim, cudaDevAttrMaxGridDimZ, d),
174 "failed to query the maximum z-dimension per grid on device ", d
175 )
176 return dim;
177}
178
183 int num = 0;
184 TF_CHECK_CUDA(
185 cudaDeviceGetAttribute(&num, cudaDevAttrMaxSharedMemoryPerBlock, d),
186 "failed to query the maximum shared memory per block on device ", d
187 )
188 return num;
189}
190
194inline size_t cuda_get_device_warp_size(int d) {
195 int num = 0;
196 TF_CHECK_CUDA(
197 cudaDeviceGetAttribute(&num, cudaDevAttrWarpSize, d),
198 "failed to query the warp size per block on device ", d
199 )
200 return num;
201}
202
207 int num = 0;
208 TF_CHECK_CUDA(
209 cudaDeviceGetAttribute(&num, cudaDevAttrComputeCapabilityMajor, d),
210 "failed to query the major number of compute capability of device ", d
211 )
212 return num;
213}
214
219 int num = 0;
220 TF_CHECK_CUDA(
221 cudaDeviceGetAttribute(&num, cudaDevAttrComputeCapabilityMinor, d),
222 "failed to query the minor number of compute capability of device ", d
223 )
224 return num;
225}
226
231 int num = 0;
232 TF_CHECK_CUDA(
233 cudaDeviceGetAttribute(&num, cudaDevAttrUnifiedAddressing, d),
234 "failed to query unified addressing status on device ", d
235 )
236 return num;
237}
238
239// ----------------------------------------------------------------------------
240// CUDA Version
241// ----------------------------------------------------------------------------
242
247 int num = 0;
248 TF_CHECK_CUDA(
249 cudaDriverGetVersion(&num),
250 "failed to query the latest cuda version supported by the driver"
251 );
252 return num;
253}
254
259 int num = 0;
260 TF_CHECK_CUDA(
261 cudaRuntimeGetVersion(&num), "failed to query cuda runtime version"
262 );
263 return num;
264}
265
266// ----------------------------------------------------------------------------
267// cudaScopedDevice
268// ----------------------------------------------------------------------------
269
290
291 public:
292
298 explicit cudaScopedDevice(int device);
299
304
305 private:
306
307 cudaScopedDevice() = delete;
308 cudaScopedDevice(const cudaScopedDevice&) = delete;
310
311 int _p;
312};
313
314// Constructor
316 TF_CHECK_CUDA(cudaGetDevice(&_p), "failed to get current device scope");
317 if(_p == dev) {
318 _p = -1;
319 }
320 else {
321 TF_CHECK_CUDA(cudaSetDevice(dev), "failed to scope on device ", dev);
322 }
323}
324
325// Destructor
327 if(_p != -1) {
328 cudaSetDevice(_p);
329 //TF_CHECK_CUDA(cudaSetDevice(_p), "failed to scope back to device ", _p);
330 }
331}
332
333} // end of namespace cuda ---------------------------------------------------
334
335
336
337
338
cudaScopedDevice(int device)
constructs a RAII-styled device switcher
Definition cuda_device.hpp:315
~cudaScopedDevice()
destructs the guard and switches back to the previous device context
Definition cuda_device.hpp:326
taskflow namespace
Definition small_vector.hpp:20
size_t cuda_get_device_max_z_dim_per_grid(int d)
queries the maximum z-dimension per grid on a device
Definition cuda_device.hpp:170
int cuda_get_device_compute_capability_major(int d)
queries the major number of compute capability of a device
Definition cuda_device.hpp:206
int cuda_get_device()
gets the current device associated with the caller thread
Definition cuda_device.hpp:24
int cuda_get_runtime_version()
queries the CUDA Runtime version (1000 * major + 10 * minor)
Definition cuda_device.hpp:258
void cuda_get_device_property(int i, cudaDeviceProp &p)
obtains the device property
Definition cuda_device.hpp:40
int cuda_get_driver_version()
queries the latest CUDA version (1000 * major + 10 * minor) supported by the driver
Definition cuda_device.hpp:246
size_t cuda_get_device_max_z_dim_per_block(int d)
queries the maximum z-dimension per block on a device
Definition cuda_device.hpp:134
size_t cuda_get_device_max_x_dim_per_grid(int d)
queries the maximum x-dimension per grid on a device
Definition cuda_device.hpp:146
int cuda_get_device_compute_capability_minor(int d)
queries the minor number of compute capability of a device
Definition cuda_device.hpp:218
size_t cuda_get_device_max_y_dim_per_grid(int d)
queries the maximum y-dimension per grid on a device
Definition cuda_device.hpp:158
size_t cuda_get_device_max_y_dim_per_block(int d)
queries the maximum y-dimension per block on a device
Definition cuda_device.hpp:122
size_t cuda_get_device_max_threads_per_block(int d)
queries the maximum threads per block on a device
Definition cuda_device.hpp:98
size_t cuda_get_num_devices()
queries the number of available devices
Definition cuda_device.hpp:15
bool cuda_get_device_unified_addressing(int d)
queries if the device supports unified addressing
Definition cuda_device.hpp:230
void cuda_set_device(int id)
switches to a given device context
Definition cuda_device.hpp:33
size_t cuda_get_device_warp_size(int d)
queries the warp size on a device
Definition cuda_device.hpp:194
size_t cuda_get_device_max_shm_per_block(int d)
queries the maximum shared memory size in bytes per block on a device
Definition cuda_device.hpp:182
size_t cuda_get_device_max_x_dim_per_block(int d)
queries the maximum x-dimension per block on a device
Definition cuda_device.hpp:110
void cuda_dump_device_property(std::ostream &os, const cudaDeviceProp &p)
dumps the device property
Definition cuda_device.hpp:60