Caffe2 - C++ API
A deep learning, cross platform ML framework
prof_dag_counters.cc
1 #include "caffe2/core/prof_dag_counters.h"
2 #include "caffe2/utils/string_utils.h"
3 
4 #include <ostream>
5 #include <sstream>
6 
7 namespace caffe2 {
8 
9 ProfDAGCounters::ProfDAGCounters(const std::shared_ptr<const NetDef>& net_def) {
10  report_.net_name_ = net_def->name();
11  report_.num_runs_ = 0;
12  auto num_ops = net_def->op_size();
13  report_.op_types_.reserve(num_ops);
14  report_.op_extra_info_.reserve(num_ops);
15 
16  for (auto op_id = 0; op_id < num_ops; ++op_id) {
17  report_.op_types_.push_back(net_def->op(op_id).type());
18  vector<std::string> op_extra_info;
19  if (net_def->op(op_id).has_device_option() &&
20  net_def->op(op_id).device_option().extra_info_size() > 0) {
21  for (auto i = 0; i < net_def->op(op_id).device_option().extra_info_size();
22  ++i) {
23  auto extra_info_str = net_def->op(op_id).device_option().extra_info(i);
24  op_extra_info.push_back(extra_info_str);
25  }
26  }
27  report_.op_extra_info_.push_back(op_extra_info);
28  }
29  report_.time_per_op_total_.resize(num_ops);
30 }
31 
32 void ProfDAGCounters::ReportRunStart() {
33  report_.num_runs_ += 1;
34  timer_.Start();
35  auto num_ops = report_.op_types_.size();
36  op_start_times_run_.clear();
37  op_start_times_run_.resize(num_ops, -1.0);
38  op_end_times_run_.clear();
39  op_end_times_run_.resize(num_ops, -1.0);
40  op_async_end_times_run_.clear();
41  op_async_end_times_run_.resize(num_ops, -1.0);
42 }
43 
44 void ProfDAGCounters::AddPerOpStartTime(size_t op_id) {
45  if (report_.num_runs_ <= 1) {
46  return;
47  }
48 
49  CAFFE_ENFORCE(op_id >= 0 && op_id < op_start_times_run_.size());
50  op_start_times_run_[op_id] = timer_.MilliSeconds();
51 }
52 
53 void ProfDAGCounters::AddPerOpEndTime(size_t op_id) {
54  if (report_.num_runs_ <= 1) {
55  return;
56  }
57 
58  CAFFE_ENFORCE(op_id >= 0 && op_id < op_end_times_run_.size());
59  op_end_times_run_[op_id] = timer_.MilliSeconds();
60 }
61 
62 void ProfDAGCounters::AddPerOpAsyncEndTime(size_t op_id) {
63  if (report_.num_runs_ <= 1) {
64  return;
65  }
66 
67  CAFFE_ENFORCE(op_id >= 0 && op_id < op_async_end_times_run_.size());
68  op_async_end_times_run_[op_id] = timer_.MilliSeconds();
69 }
70 
71 void ProfDAGCounters::ReportRunEnd() {
72  if (report_.num_runs_ <= 1) {
73  return;
74  }
75 
76  auto runtime = timer_.MilliSeconds();
77 
78  CaffeMap<std::string, float> cum_per_type_time_run;
79  CaffeMap<std::string, float> cum_per_type_invocations_run;
80  std::vector<float> per_op_time_run(report_.op_types_.size(), 0.0);
81  for (auto op_id = 0; op_id < report_.op_types_.size(); ++op_id) {
82  // check that we have valid times, otherwise return;
83  // times might not be valid if network execution ended prematurely
84  // because of operator errors
85  if (op_start_times_run_[op_id] < 0.0) {
86  return;
87  }
88 
89  float op_time = 0.0;
90  if (op_async_end_times_run_[op_id] > 0.0) {
91  op_time = op_async_end_times_run_[op_id] - op_start_times_run_[op_id];
92  } else {
93  if (op_end_times_run_[op_id] < 0.0) {
94  return;
95  }
96  op_time = op_end_times_run_[op_id] - op_start_times_run_[op_id];
97  }
98 
99  per_op_time_run[op_id] = op_time;
100 
101  const string& op_type = report_.op_types_[op_id];
102  cum_per_type_time_run[op_type] += op_time;
103  cum_per_type_invocations_run[op_type] += 1;
104  }
105 
106  // all operator times are valid, update report stats
107  report_.runtime_stats_ += ProfDAGStats(runtime);
108 
109  for (auto op_id = 0; op_id < report_.op_types_.size(); ++op_id) {
110  report_.time_per_op_total_[op_id] += ProfDAGStats(per_op_time_run[op_id]);
111  }
112 
113  for (const auto& kv : cum_per_type_time_run) {
114  report_.time_per_op_type_total_[kv.first] += ProfDAGStats(kv.second);
115  report_.times_per_run_per_type_total_[kv.first] +=
116  ProfDAGStats(cum_per_type_invocations_run[kv.first]);
117  }
118 }
119 
120 ProfDAGReport ProfDAGCounters::GetReport() const {
121  return report_;
122 }
123 
124 bool ProfDAGReport::hasStats() const {
125  return runtime_stats_.cnt() > 0;
126 }
127 
128 ProfDAGProto ProfDAGReport::statsProto(
129  const std::string& name,
130  const ProfDAGStats& stats,
131  const std::vector<std::string>& op_extra_info) const {
132  ProfDAGProto stats_proto;
133  const auto& moments = stats.computeMoments();
134  stats_proto.set_mean(moments.first);
135  stats_proto.set_stddev(moments.second);
136  stats_proto.set_name(name);
137  for (auto& extra_info : op_extra_info) {
138  stats_proto.add_extra_info(extra_info);
139  }
140  return stats_proto;
141 }
142 
143 ProfDAGProtos ProfDAGReport::GetOperatorStats() const {
144  ProfDAGProtos prof_dag_protos;
145  prof_dag_protos.set_net_name(net_name_);
146  if (hasStats()) {
147  for (auto& item : time_per_op_type_total_) {
148  auto buf = prof_dag_protos.add_stats();
149  buf->CopyFrom(statsProto(item.first, item.second, vector<std::string>()));
150  }
151  }
152  return prof_dag_protos;
153 }
154 
155 ProfDAGProtos ProfDAGReport::GetPerOperatorCost() const {
156  ProfDAGProtos prof_dag_protos;
157  prof_dag_protos.set_net_name(net_name_);
158  if (hasStats()) {
159  for (int op_id = 0; op_id < op_types_.size(); op_id++) {
160  const string& op_type = op_types_[op_id];
161  auto buf = prof_dag_protos.add_stats();
162  std::string op_output_name =
163  net_name_ + "___" + to_string(op_id) + "___" + op_type;
164  buf->CopyFrom(statsProto(
165  op_output_name, time_per_op_total_[op_id], op_extra_info_[op_id]));
166  }
167  }
168  return prof_dag_protos;
169 }
170 
171 void ProfDAGReport::PrintStats() {
172  if (!hasStats()) {
173  LOG(INFO) << "Insufficient number of runs";
174  return;
175  }
176 
177  std::ostringstream debug_out;
178  debug_out << "Measured operators over " << runtime_stats_.cnt()
179  << " net runs (" << net_name_ << "), #ops: " << op_types_.size()
180  << std::endl;
181 
182  debug_out << "Mean time in operator type per run (stddev):" << std::endl;
183  for (const auto& item : time_per_op_type_total_) {
184  const auto& moments = item.second.computeMoments();
185  const auto& times_moments =
186  times_per_run_per_type_total_[item.first].computeMoments();
187  debug_out << std::setw(10) << std::setfill(' ') << moments.first
188  << " ms/run (" << std::setw(10) << std::setfill(' ')
189  << moments.second << " ms/run) "
190  << " Op count per run: " << times_moments.first << " "
191  << item.first << std::endl;
192  }
193  const auto& runtime_moments = runtime_stats_.computeMoments();
194  debug_out << net_name_ << " runtime: " << runtime_moments.first << " ms ("
195  << runtime_moments.second << " ms)" << std::endl;
196 
197  LOG(INFO) << debug_out.str();
198 }
199 
200 ProfDAGReport& ProfDAGReport::operator+=(const ProfDAGReport& rhs) {
201  // Verify nets are compatible for addition
202  CAFFE_ENFORCE_EQ(
203  net_name_, rhs.net_name_, "Incompatible nets to add counters");
204  CAFFE_ENFORCE_EQ(
205  op_types_.size(),
206  rhs.op_types_.size(),
207  "Incompatible nets to add counters");
208  for (auto idx = 0; idx < op_types_.size(); ++idx) {
209  CAFFE_ENFORCE_EQ(
210  op_types_[idx],
211  rhs.op_types_[idx],
212  "Incompatible nets to add counters");
213  }
214 
215  if (!rhs.hasStats()) {
216  // rhs does not have valid profiling results, do nothing
217  return *this;
218  } else if (!hasStats()) {
219  // "this" does not have valid profiling results, but rhs does. copy rhs
220  time_per_op_total_ = rhs.time_per_op_total_;
221  time_per_op_type_total_ = rhs.time_per_op_type_total_;
222  times_per_run_per_type_total_ = rhs.times_per_run_per_type_total_;
223  runtime_stats_ = rhs.runtime_stats_;
224  num_runs_ = rhs.num_runs_;
225  return *this;
226  }
227 
228  // Do the addition
229  for (auto idx = 0; idx < time_per_op_total_.size(); ++idx) {
230  time_per_op_total_[idx] += rhs.time_per_op_total_.at(idx);
231  }
232  for (auto& item : time_per_op_type_total_) {
233  item.second += rhs.time_per_op_type_total_.at(item.first);
234  }
235  for (auto& item : times_per_run_per_type_total_) {
236  item.second += rhs.times_per_run_per_type_total_.at(item.first);
237  }
238  runtime_stats_ += rhs.runtime_stats_;
239  num_runs_ += rhs.num_runs_;
240 
241  return *this;
242 }
243 
244 } // namespace caffe2
void Start()
Starts a timer.
Definition: timer.h:24
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
float MilliSeconds()
Returns the elapsed time in milliseconds.
Definition: timer.h:32