1 #include "caffe2/core/prof_dag_counters.h" 2 #include "caffe2/utils/string_utils.h" 9 ProfDAGCounters::ProfDAGCounters(
const std::shared_ptr<const NetDef>& net_def) {
10 report_.net_name_ = net_def->name();
11 report_.num_runs_ = 0;
12 auto num_ops = net_def->op_size();
13 report_.op_types_.reserve(num_ops);
14 report_.op_extra_info_.reserve(num_ops);
16 for (
auto op_id = 0; op_id < num_ops; ++op_id) {
17 report_.op_types_.push_back(net_def->op(op_id).type());
18 vector<std::string> op_extra_info;
19 if (net_def->op(op_id).has_device_option() &&
20 net_def->op(op_id).device_option().extra_info_size() > 0) {
21 for (
auto i = 0; i < net_def->op(op_id).device_option().extra_info_size();
23 auto extra_info_str = net_def->op(op_id).device_option().extra_info(i);
24 op_extra_info.push_back(extra_info_str);
27 report_.op_extra_info_.push_back(op_extra_info);
29 report_.time_per_op_total_.resize(num_ops);
32 void ProfDAGCounters::ReportRunStart() {
33 report_.num_runs_ += 1;
35 auto num_ops = report_.op_types_.size();
36 op_start_times_run_.clear();
37 op_start_times_run_.resize(num_ops, -1.0);
38 op_end_times_run_.clear();
39 op_end_times_run_.resize(num_ops, -1.0);
40 op_async_end_times_run_.clear();
41 op_async_end_times_run_.resize(num_ops, -1.0);
44 void ProfDAGCounters::AddPerOpStartTime(
size_t op_id) {
45 if (report_.num_runs_ <= 1) {
49 CAFFE_ENFORCE(op_id >= 0 && op_id < op_start_times_run_.size());
53 void ProfDAGCounters::AddPerOpEndTime(
size_t op_id) {
54 if (report_.num_runs_ <= 1) {
58 CAFFE_ENFORCE(op_id >= 0 && op_id < op_end_times_run_.size());
62 void ProfDAGCounters::AddPerOpAsyncEndTime(
size_t op_id) {
63 if (report_.num_runs_ <= 1) {
67 CAFFE_ENFORCE(op_id >= 0 && op_id < op_async_end_times_run_.size());
71 void ProfDAGCounters::ReportRunEnd() {
72 if (report_.num_runs_ <= 1) {
78 CaffeMap<std::string, float> cum_per_type_time_run;
79 CaffeMap<std::string, float> cum_per_type_invocations_run;
80 std::vector<float> per_op_time_run(report_.op_types_.size(), 0.0);
81 for (
auto op_id = 0; op_id < report_.op_types_.size(); ++op_id) {
85 if (op_start_times_run_[op_id] < 0.0) {
90 if (op_async_end_times_run_[op_id] > 0.0) {
91 op_time = op_async_end_times_run_[op_id] - op_start_times_run_[op_id];
93 if (op_end_times_run_[op_id] < 0.0) {
96 op_time = op_end_times_run_[op_id] - op_start_times_run_[op_id];
99 per_op_time_run[op_id] = op_time;
101 const string& op_type = report_.op_types_[op_id];
102 cum_per_type_time_run[op_type] += op_time;
103 cum_per_type_invocations_run[op_type] += 1;
107 report_.runtime_stats_ += ProfDAGStats(runtime);
109 for (
auto op_id = 0; op_id < report_.op_types_.size(); ++op_id) {
110 report_.time_per_op_total_[op_id] += ProfDAGStats(per_op_time_run[op_id]);
113 for (
const auto& kv : cum_per_type_time_run) {
114 report_.time_per_op_type_total_[kv.first] += ProfDAGStats(kv.second);
115 report_.times_per_run_per_type_total_[kv.first] +=
116 ProfDAGStats(cum_per_type_invocations_run[kv.first]);
120 ProfDAGReport ProfDAGCounters::GetReport()
const {
124 bool ProfDAGReport::hasStats()
const {
125 return runtime_stats_.cnt() > 0;
128 ProfDAGProto ProfDAGReport::statsProto(
129 const std::string& name,
130 const ProfDAGStats& stats,
131 const std::vector<std::string>& op_extra_info)
const {
132 ProfDAGProto stats_proto;
133 const auto& moments = stats.computeMoments();
134 stats_proto.set_mean(moments.first);
135 stats_proto.set_stddev(moments.second);
136 stats_proto.set_name(name);
137 for (
auto& extra_info : op_extra_info) {
138 stats_proto.add_extra_info(extra_info);
143 ProfDAGProtos ProfDAGReport::GetOperatorStats()
const {
144 ProfDAGProtos prof_dag_protos;
145 prof_dag_protos.set_net_name(net_name_);
147 for (
auto& item : time_per_op_type_total_) {
148 auto buf = prof_dag_protos.add_stats();
149 buf->CopyFrom(statsProto(item.first, item.second, vector<std::string>()));
152 return prof_dag_protos;
155 ProfDAGProtos ProfDAGReport::GetPerOperatorCost()
const {
156 ProfDAGProtos prof_dag_protos;
157 prof_dag_protos.set_net_name(net_name_);
159 for (
int op_id = 0; op_id < op_types_.size(); op_id++) {
160 const string& op_type = op_types_[op_id];
161 auto buf = prof_dag_protos.add_stats();
162 std::string op_output_name =
163 net_name_ +
"___" + to_string(op_id) +
"___" + op_type;
164 buf->CopyFrom(statsProto(
165 op_output_name, time_per_op_total_[op_id], op_extra_info_[op_id]));
168 return prof_dag_protos;
171 void ProfDAGReport::PrintStats() {
173 LOG(INFO) <<
"Insufficient number of runs";
177 std::ostringstream debug_out;
178 debug_out <<
"Measured operators over " << runtime_stats_.cnt()
179 <<
" net runs (" << net_name_ <<
"), #ops: " << op_types_.size()
182 debug_out <<
"Mean time in operator type per run (stddev):" << std::endl;
183 for (
const auto& item : time_per_op_type_total_) {
184 const auto& moments = item.second.computeMoments();
185 const auto& times_moments =
186 times_per_run_per_type_total_[item.first].computeMoments();
187 debug_out << std::setw(10) << std::setfill(
' ') << moments.first
188 <<
" ms/run (" << std::setw(10) << std::setfill(
' ')
189 << moments.second <<
" ms/run) " 190 <<
" Op count per run: " << times_moments.first <<
" " 191 << item.first << std::endl;
193 const auto& runtime_moments = runtime_stats_.computeMoments();
194 debug_out << net_name_ <<
" runtime: " << runtime_moments.first <<
" ms (" 195 << runtime_moments.second <<
" ms)" << std::endl;
197 LOG(INFO) << debug_out.str();
200 ProfDAGReport& ProfDAGReport::operator+=(
const ProfDAGReport& rhs) {
203 net_name_, rhs.net_name_,
"Incompatible nets to add counters");
206 rhs.op_types_.size(),
207 "Incompatible nets to add counters");
208 for (
auto idx = 0; idx < op_types_.size(); ++idx) {
212 "Incompatible nets to add counters");
215 if (!rhs.hasStats()) {
218 }
else if (!hasStats()) {
220 time_per_op_total_ = rhs.time_per_op_total_;
221 time_per_op_type_total_ = rhs.time_per_op_type_total_;
222 times_per_run_per_type_total_ = rhs.times_per_run_per_type_total_;
223 runtime_stats_ = rhs.runtime_stats_;
224 num_runs_ = rhs.num_runs_;
229 for (
auto idx = 0; idx < time_per_op_total_.size(); ++idx) {
230 time_per_op_total_[idx] += rhs.time_per_op_total_.at(idx);
232 for (
auto& item : time_per_op_type_total_) {
233 item.second += rhs.time_per_op_type_total_.at(item.first);
235 for (
auto& item : times_per_run_per_type_total_) {
236 item.second += rhs.times_per_run_per_type_total_.at(item.first);
238 runtime_stats_ += rhs.runtime_stats_;
239 num_runs_ += rhs.num_runs_;
void Start()
Starts a timer.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
float MilliSeconds()
Returns the elapsed time in milliseconds.