1 #include "caffe2/core/common_gpu.h" 8 #include "caffe2/core/asan.h" 9 #include "caffe2/core/common.h" 10 #include "caffe2/core/init.h" 11 #include "caffe2/core/logging.h" 16 if (getenv(
"CAFFE2_DEBUG_CUDA_INIT_ORDER")) {
17 static bool first =
true;
20 std::cerr <<
"DEBUG: caffe2::NumCudaDevices() invoked for the first time" 24 static int count = -1;
26 auto err = cudaGetDeviceCount(&count);
31 case cudaErrorNoDevice:
34 case cudaErrorInsufficientDriver:
35 LOG(WARNING) <<
"Insufficient cuda driver. Cannot use cuda.";
38 case cudaErrorInitializationError:
39 LOG(WARNING) <<
"Cuda driver initialization failed, you might not " 43 case cudaErrorUnknown:
44 LOG(ERROR) <<
"Found an unknown error - this may be due to an " 45 "incorrectly set up environment, e.g. changing env " 46 "variable CUDA_VISIBLE_DEVICES after program start. " 47 "I will set the available devices to be zero.";
50 case cudaErrorMemoryAllocation:
51 #if CAFFE2_ASAN_ENABLED 54 LOG(ERROR) <<
"It is known that CUDA does not work well with ASAN. As " 55 "a result we will simply shut down CUDA support. If you " 56 "would like to use GPUs, turn off ASAN.";
59 #else // CAFFE2_ASAN_ENABLED 62 LOG(FATAL) <<
"Unexpected error from cudaGetDeviceCount(). Did you run " 63 "some cuda functions before calling NumCudaDevices() " 64 "that might have already set an error? Error: " 67 #endif // CAFFE2_ASAN_ENABLED 69 LOG(FATAL) <<
"Unexpected error from cudaGetDeviceCount(). Did you run " 70 "some cuda functions before calling NumCudaDevices() " 71 "that might have already set an error? Error: " 79 int gDefaultGPUID = 0;
82 void SetDefaultGPUID(
const int deviceid) {
86 "The default gpu id should be smaller than the number of gpus " 91 gDefaultGPUID = deviceid;
94 int GetDefaultGPUID() {
return gDefaultGPUID; }
98 CUDA_ENFORCE(cudaGetDevice(&gpu_id));
103 CUDA_ENFORCE(cudaSetDevice(
id));
107 cudaPointerAttributes attr;
108 cudaError_t err = cudaPointerGetAttributes(&attr, ptr);
110 if (err == cudaErrorInvalidValue) {
114 err = cudaGetLastError();
115 CHECK(err == cudaErrorInvalidValue);
122 if (attr.CAFFE2_CUDA_PTRATTR_MEMTYPE == cudaMemoryTypeHost) {
132 CUDA_ENFORCE(cudaGetDeviceProperties(&props[i], i));
136 vector<cudaDeviceProp> props;
148 "The gpu id should be smaller than the number of gpus ",
153 return props.props[deviceid];
158 std::stringstream ss;
160 ss <<
"Device id: " << device << std::endl;
161 ss <<
"Major revision number: " << prop.major << std::endl;
162 ss <<
"Minor revision number: " << prop.minor << std::endl;
163 ss <<
"Name: " << prop.name << std::endl;
164 ss <<
"Total global memory: " << prop.totalGlobalMem << std::endl;
165 ss <<
"Total shared memory per block: " << prop.sharedMemPerBlock
167 ss <<
"Total registers per block: " << prop.regsPerBlock << std::endl;
168 ss <<
"Warp size: " << prop.warpSize << std::endl;
169 #ifndef __HIP_PLATFORM_HCC__ 170 ss <<
"Maximum memory pitch: " << prop.memPitch << std::endl;
172 ss <<
"Maximum threads per block: " << prop.maxThreadsPerBlock
174 ss <<
"Maximum dimension of block: " 175 << prop.maxThreadsDim[0] <<
", " << prop.maxThreadsDim[1] <<
", " 176 << prop.maxThreadsDim[2] << std::endl;
177 ss <<
"Maximum dimension of grid: " 178 << prop.maxGridSize[0] <<
", " << prop.maxGridSize[1] <<
", " 179 << prop.maxGridSize[2] << std::endl;
180 ss <<
"Clock rate: " << prop.clockRate << std::endl;
181 ss <<
"Total constant memory: " << prop.totalConstMem << std::endl;
182 #ifndef __HIP_PLATFORM_HCC__ 183 ss <<
"Texture alignment: " << prop.textureAlignment << std::endl;
184 ss <<
"Concurrent copy and execution: " 185 << (prop.deviceOverlap ?
"Yes" :
"No") << std::endl;
187 ss <<
"Number of multiprocessors: " << prop.multiProcessorCount
189 #ifndef __HIP_PLATFORM_HCC__ 190 ss <<
"Kernel execution timeout: " 191 << (prop.kernelExecTimeoutEnabled ?
"Yes" :
"No") << std::endl;
193 LOG(INFO) << ss.str();
197 bool GetCudaPeerAccessPattern(vector<vector<bool> >* pattern) {
199 if (cudaGetDeviceCount(&gpu_count) != cudaSuccess)
return false;
201 pattern->resize(gpu_count, vector<bool>(gpu_count,
false));
202 for (
int i = 0; i < gpu_count; ++i) {
203 for (
int j = 0; j < gpu_count; ++j) {
204 int can_access =
true;
206 if (cudaDeviceCanAccessPeer(&can_access, i, j)
211 (*pattern)[i][j] =
static_cast<bool>(can_access);
219 #if CUDA_VERSION < 9000 225 return prop.major >= 7;
231 case CUBLAS_STATUS_SUCCESS:
232 return "CUBLAS_STATUS_SUCCESS";
233 case CUBLAS_STATUS_NOT_INITIALIZED:
234 return "CUBLAS_STATUS_NOT_INITIALIZED";
235 case CUBLAS_STATUS_ALLOC_FAILED:
236 return "CUBLAS_STATUS_ALLOC_FAILED";
237 case CUBLAS_STATUS_INVALID_VALUE:
238 return "CUBLAS_STATUS_INVALID_VALUE";
239 case CUBLAS_STATUS_ARCH_MISMATCH:
240 return "CUBLAS_STATUS_ARCH_MISMATCH";
241 #ifndef __HIP_PLATFORM_HCC__ 242 case CUBLAS_STATUS_MAPPING_ERROR:
243 return "CUBLAS_STATUS_MAPPING_ERROR";
244 case CUBLAS_STATUS_EXECUTION_FAILED:
245 return "CUBLAS_STATUS_EXECUTION_FAILED";
247 case CUBLAS_STATUS_INTERNAL_ERROR:
248 return "CUBLAS_STATUS_INTERNAL_ERROR";
249 #if CUDA_VERSION >= 6000 250 case CUBLAS_STATUS_NOT_SUPPORTED:
251 return "CUBLAS_STATUS_NOT_SUPPORTED";
252 #if CUDA_VERSION >= 6050 253 case CUBLAS_STATUS_LICENSE_ERROR:
254 return "CUBLAS_STATUS_LICENSE_ERROR";
255 #endif // CUDA_VERSION >= 6050 256 #endif // CUDA_VERSION >= 6000 257 #ifdef __HIP_PLATFORM_HCC__ 258 case rocblas_status_invalid_size:
259 return "rocblas_status_invalid_size";
263 return "Unrecognized cublas error string";
268 case CURAND_STATUS_SUCCESS:
269 return "CURAND_STATUS_SUCCESS";
270 case CURAND_STATUS_VERSION_MISMATCH:
271 return "CURAND_STATUS_VERSION_MISMATCH";
272 case CURAND_STATUS_NOT_INITIALIZED:
273 return "CURAND_STATUS_NOT_INITIALIZED";
274 case CURAND_STATUS_ALLOCATION_FAILED:
275 return "CURAND_STATUS_ALLOCATION_FAILED";
276 case CURAND_STATUS_TYPE_ERROR:
277 return "CURAND_STATUS_TYPE_ERROR";
278 case CURAND_STATUS_OUT_OF_RANGE:
279 return "CURAND_STATUS_OUT_OF_RANGE";
280 case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
281 return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
282 case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
283 return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
284 case CURAND_STATUS_LAUNCH_FAILURE:
285 return "CURAND_STATUS_LAUNCH_FAILURE";
286 case CURAND_STATUS_PREEXISTING_FAILURE:
287 return "CURAND_STATUS_PREEXISTING_FAILURE";
288 case CURAND_STATUS_INITIALIZATION_FAILED:
289 return "CURAND_STATUS_INITIALIZATION_FAILED";
290 case CURAND_STATUS_ARCH_MISMATCH:
291 return "CURAND_STATUS_ARCH_MISMATCH";
292 case CURAND_STATUS_INTERNAL_ERROR:
293 return "CURAND_STATUS_INTERNAL_ERROR";
294 #ifdef __HIP_PLATFORM_HCC__ 295 case HIPRAND_STATUS_NOT_IMPLEMENTED:
296 return "HIPRAND_STATUS_NOT_IMPLEMENTED";
300 return "Unrecognized curand error string";
306 class CudaRuntimeFlagFlipper {
308 CudaRuntimeFlagFlipper() {
309 internal::SetCudaRuntimeFlag();
312 static CudaRuntimeFlagFlipper g_flipper;
int CaffeCudaGetDevice()
Gets the current GPU id.
bool TensorCoreAvailable()
Return the availability of TensorCores for math.
const char * curandGetErrorString(curandStatus_t error)
Return a human readable curand error string.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
int GetGPUIDForPointer(const void *ptr)
Gets the GPU id that the current pointer is located at.
const char * cublasGetErrorString(cublasStatus_t error)
Return a human readable cublas error string.
const cudaDeviceProp & GetDeviceProperty(const int deviceid)
Gets the device property for the given device.
void CaffeCudaSetDevice(const int id)
Gets the current GPU id.
int NumCudaDevices()
Returns the number of devices.
void DeviceQuery(const int device)
Runs a device query function and prints out the results to LOG(INFO).