1 #include "caffe2/core/context_gpu.h" 2 #include "caffe2/core/event_cpu.h" 3 #include "caffe2/core/operator.h" 11 : cuda_stream_(
nullptr),
12 device_id_(option.device_id()),
13 status_(EventStatus::EVENT_INITIALIZED) {
14 CAFFE_ENFORCE(option.device_type(), PROTO_CUDA);
16 CUDA_ENFORCE(cudaEventCreateWithFlags(
17 &cuda_event_, cudaEventDefault | cudaEventDisableTiming));
21 CUDA_CHECK(cudaEventDestroy(cuda_event_));
24 cudaEvent_t cuda_event_;
25 cudaStream_t cuda_stream_;
28 std::atomic<int> status_;
29 std::mutex mutex_recorded_;
30 std::condition_variable cv_recorded_;
35 const std::string kNoError =
"No error";
38 void EventCreateCUDA(
const DeviceOption& option,
Event* event) {
39 event->event_ = std::make_shared<CudaEventWrapper>(option);
42 void EventRecordCUDA(
Event* event,
const void* context,
const char* err_msg) {
45 std::unique_lock<std::mutex> lock(wrapper->mutex_recorded_);
58 EventStatus::EVENT_INITIALIZED,
59 "Calling Record multiple times");
69 "When you call EventRecordCUDA, your current device should be the same " 70 "as the device specified by the event.");
73 static_cast<const CUDAContext*>(context)->device_id());
74 CUDA_ENFORCE(cudaEventRecord(
76 static_cast<const CUDAContext*>(context)->cuda_stream()));
77 wrapper->cuda_stream_ =
78 static_cast<const CUDAContext*
>(context)->cuda_stream();
79 wrapper->status_ = EventStatus::EVENT_SCHEDULED;
81 wrapper->err_msg_ = err_msg;
82 wrapper->status_ = EventStatus::EVENT_FAILED;
85 wrapper->cv_recorded_.notify_all();
88 void EventFinishCUDA(
const Event* event) {
91 std::unique_lock<std::mutex> lock(wrapper->mutex_recorded_);
92 while (wrapper->status_ == EventStatus::EVENT_INITIALIZED) {
93 wrapper->cv_recorded_.wait(lock);
97 if (wrapper->status_ == EventStatus::EVENT_SCHEDULED) {
100 auto cudaResult = cudaEventSynchronize(wrapper->cuda_event_);
101 if (cudaResult == cudaSuccess) {
102 wrapper->status_ = EventStatus::EVENT_SUCCESS;
104 const auto& err_msg = cudaGetErrorString(cudaResult);
106 std::unique_lock<std::mutex> lock(wrapper->mutex_recorded_);
107 wrapper->err_msg_ = err_msg;
108 wrapper->status_ = EventStatus::EVENT_FAILED;
114 void EventWaitCUDACUDA(
const Event* event,
void* context) {
117 std::unique_lock<std::mutex> lock(wrapper->mutex_recorded_);
118 while (wrapper->status_ == EventStatus::EVENT_INITIALIZED) {
119 wrapper->cv_recorded_.wait(lock);
123 if (wrapper->status_ == EventStatus::EVENT_SCHEDULED) {
125 auto context_stream =
static_cast<CUDAContext*
>(context)->cuda_stream();
126 auto event_stream = wrapper->cuda_stream_;
127 if (context_stream != event_stream) {
131 CUDA_CHECK(cudaStreamWaitEvent(context_stream, wrapper->cuda_event_, 0));
137 void EventWaitCPUCUDA(
const Event* event,
void* context) {
138 EventFinishCUDA(event);
142 void EventWaitCUDACPU(
const Event* event,
void* context) {
146 EventStatus EventQueryCUDA(
const Event* event) {
148 if (wrapper->status_ == EventStatus::EVENT_SCHEDULED) {
149 auto cudaResult = cudaEventQuery(wrapper->cuda_event_);
150 if (cudaResult == cudaSuccess) {
151 wrapper->status_ = EventStatus::EVENT_SUCCESS;
152 }
else if (cudaResult != cudaErrorNotReady) {
153 const auto& err_msg = cudaGetErrorString(cudaResult);
155 std::unique_lock<std::mutex> lock(wrapper->mutex_recorded_);
156 wrapper->err_msg_ = err_msg;
157 wrapper->status_ = EventStatus::EVENT_FAILED;
160 return static_cast<EventStatus
>(wrapper->status_.load());
163 const std::string& EventErrorMessageCUDA(
const Event* event) {
166 if (wrapper->status_ == EventStatus::EVENT_FAILED) {
167 return wrapper->err_msg_;
173 void EventSetFinishedCUDA(
const Event* event,
const char* err_msg) {
176 std::unique_lock<std::mutex> lock(wrapper->mutex_recorded_);
180 EventStatus::EVENT_INITIALIZED,
181 "Calling SetFinished on recorded CUDA event");
184 wrapper->status_ = EventStatus::EVENT_SUCCESS;
186 wrapper->err_msg_ = err_msg;
187 wrapper->status_ = EventStatus::EVENT_FAILED;
190 wrapper->cv_recorded_.notify_all();
193 void EventResetCUDA(
Event* event) {
195 std::unique_lock<std::mutex> lock(wrapper->mutex_recorded_);
196 wrapper->status_ = EventStatus::EVENT_INITIALIZED;
197 wrapper->err_msg_ =
"";
198 wrapper->cuda_stream_ =
nullptr;
201 REGISTER_EVENT_CREATE_FUNCTION(CUDA, EventCreateCUDA);
202 REGISTER_EVENT_RECORD_FUNCTION(CUDA, EventRecordCUDA);
203 REGISTER_EVENT_WAIT_FUNCTION(CUDA, CUDA, EventWaitCUDACUDA);
204 REGISTER_EVENT_WAIT_FUNCTION(CPU, CUDA, EventWaitCPUCUDA);
205 REGISTER_EVENT_WAIT_FUNCTION(CUDA, CPU, EventWaitCUDACPU);
206 REGISTER_EVENT_FINISH_FUNCTION(CUDA, EventFinishCUDA);
208 REGISTER_EVENT_QUERY_FUNCTION(CUDA, EventQueryCUDA);
209 REGISTER_EVENT_ERROR_MESSAGE_FUNCTION(CUDA, EventErrorMessageCUDA);
210 REGISTER_EVENT_SET_FINISHED_FUNCTION(CUDA, EventSetFinishedCUDA);
211 REGISTER_EVENT_RESET_FUNCTION(CUDA, EventResetCUDA);
213 REGISTER_EVENT_WAIT_FUNCTION(MKLDNN, CUDA, EventWaitCPUCUDA);
214 REGISTER_EVENT_WAIT_FUNCTION(CUDA, MKLDNN, EventWaitCUDACPU);
int CaffeCudaGetDevice()
Gets the current GPU id.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
A variant of DeviceGuard that is specialized for CUDA.