Caffe2 - C++ API
A deep learning, cross platform ML framework
profiler_cuda.cpp
1 #include <torch/csrc/autograd/profiler.h>
2 #include <c10/cuda/CUDAGuard.h>
3 #include <nvToolsExt.h>
4 
5 #include <sstream>
6 
7 namespace torch { namespace autograd { namespace profiler {
8 
9 namespace {
10 
11 static inline void cudaCheck(cudaError_t result, const char * file, int line) {
12  if(result != cudaSuccess) {
13  std::stringstream ss;
14  ss << file << ":" << line << ": " << cudaGetErrorString(result);
15  throw std::runtime_error(ss.str());
16  }
17 }
18 #define TORCH_CUDA_CHECK(result) cudaCheck(result,__FILE__,__LINE__);
19 
20 struct CUDAMethods : public CUDAStubs {
21  void record(int* device, CUDAEventStub* event, int64_t* cpu_ns) override {
22  TORCH_CUDA_CHECK(cudaGetDevice(device));
23  TORCH_CUDA_CHECK(cudaEventCreate(event));
24  auto stream = at::cuda::getCurrentCUDAStream();
25  *cpu_ns = getTime();
26  TORCH_CUDA_CHECK(cudaEventRecord(*event, stream));
27  }
28  float elapsed(CUDAEventStub event, CUDAEventStub event2) override {
29  TORCH_CUDA_CHECK(cudaEventSynchronize(event));
30  TORCH_CUDA_CHECK(cudaEventSynchronize(event2));
31  float ms;
32  TORCH_CUDA_CHECK(cudaEventElapsedTime(&ms, event, event2));
33  return ms*1000.0;
34  }
35  void nvtxMarkA(const char* name) override {
36  ::nvtxMark(name);
37  }
38  void nvtxRangePushA(const char* name) override {
39  ::nvtxRangePushA(name);
40  }
41  void nvtxRangePop() override {
42  ::nvtxRangePop();
43  }
44  void onEachDevice(std::function<void(int)> op) override {
45  at::cuda::OptionalCUDAGuard device_guard;
46  int count;
47  TORCH_CUDA_CHECK(cudaGetDeviceCount(&count));
48  for(int i = 0; i < count; i++) {
49  device_guard.set_index(i);
50  op(i);
51  }
52  }
53  void synchronize() override {
54  cudaDeviceSynchronize();
55  }
56  bool enabled() override {
57  return true;
58  }
59 
60 };
61 
62 struct RegisterCUDAMethods {
63  RegisterCUDAMethods() {
64  static CUDAMethods methods;
65  registerCUDAMethods(&methods);
66  }
67 };
68 RegisterCUDAMethods reg;
69 
70 } // namespaces
71 } // namespace profiler
72 } // namespace autograd
73 } // namespace torch
A variant of OptionalDeviceGuard that is specialized for CUDA.
Definition: CUDAGuard.h:65
Definition: jit_type.h:17
void set_index(DeviceIndex device_index)
Sets the CUDA device to the given device index, initializing the guard if it is not already initializ...
Definition: CUDAGuard.h:97