Caffe2 - C++ API
A deep learning, cross platform ML framework
net_simple.cc
1 
17 #include "caffe2/core/net_simple.h"
18 #include "caffe2/core/net.h"
19 
20 #include <set>
21 #include <unordered_map>
22 #include <unordered_set>
23 
24 #include "caffe2/core/operator.h"
25 #include "caffe2/core/static_tracepoint.h"
26 #include "caffe2/core/timer.h"
27 #include "caffe2/proto/caffe2.pb.h"
28 #include "caffe2/utils/proto_utils.h"
29 
30 namespace caffe2 {
31 
32 SimpleNet::SimpleNet(
33  const std::shared_ptr<const NetDef>& net_def,
34  Workspace* ws)
35  : NetBase(net_def, ws) {
36  VLOG(1) << "Constructing SimpleNet " << net_def->name();
37  const bool net_def_has_device_option = net_def->has_device_option();
38  // Initialize the operators
39  for (int idx = 0; idx < net_def->op_size(); ++idx) {
40  const auto& operator_def = net_def->op(idx);
41  VLOG(1) << "Creating operator " << operator_def.name() << ": "
42  << operator_def.type();
43  std::unique_ptr<OperatorBase> op{nullptr};
44  if (!operator_def.has_device_option() && net_def_has_device_option) {
45  // In the case that the operator def does not specify a device option but
46  // the net def has a default option, we copy the device option over to the
47  // operator def.
48  OperatorDef temp_def(operator_def);
49  temp_def.mutable_device_option()->CopyFrom(net_def->device_option());
50  op = CreateOperator(temp_def, ws, idx);
51  } else {
52  op = CreateOperator(operator_def, ws, idx);
53  op->set_debug_def(
54  std::shared_ptr<const OperatorDef>{net_def, &(net_def->op(idx))});
55  }
56  operators_.emplace_back(std::move(op));
57  }
58 }
59 
60 bool SimpleNet::Run() {
61  StartAllObservers();
62  VLOG(1) << "Running net " << name_;
63  for (auto& op : operators_) {
64  VLOG(1) << "Running operator " << op->debug_def().name() << "("
65  << op->debug_def().type() << ").";
66 #ifdef CAFFE2_ENABLE_SDT
67  const auto& op_name = op->debug_def().name().c_str();
68  const auto& op_type = op->debug_def().type().c_str();
69  auto* op_ptr = op.get();
70  const auto& net_name = name_.c_str();
71  CAFFE_SDT(operator_start, net_name, op_name, op_type, op_ptr);
72 #endif
73  bool res = op->Run();
74 #ifdef CAFFE2_ENABLE_SDT
75  CAFFE_SDT(operator_done, net_name, op_name, op_type, op_ptr);
76 #endif
77  if (!res) {
78  LOG(ERROR) << "Operator failed: " << ProtoDebugString(op->debug_def());
79  return false;
80  }
81  }
82  StopAllObservers();
83  return true;
84 }
85 
86 bool SimpleNet::RunAsync() {
87  return Run();
88 }
89 
90 namespace {
91 template <typename A, typename B>
92 bool PairLargerThan(const std::pair<A, B>& x, const std::pair<A, B>& y) {
93  return x.second > y.second;
94 }
95 }
96 
98  const int warmup_runs,
99  const int main_runs,
100  const bool run_individual) {
101  LOG(INFO) << "Starting benchmark.";
102  LOG(INFO) << "Running warmup runs.";
103  CAFFE_ENFORCE(
104  warmup_runs >= 0,
105  "Number of warm up runs should be non negative, provided ",
106  warmup_runs,
107  ".");
108  for (int i = 0; i < warmup_runs; ++i) {
109  CAFFE_ENFORCE(Run(), "Warmup run ", i, " has failed.");
110  }
111 
112  LOG(INFO) << "Main runs.";
113  CAFFE_ENFORCE(
114  main_runs >= 0,
115  "Number of main runs should be non negative, provided ",
116  main_runs,
117  ".");
118  Timer timer;
119  for (int i = 0; i < main_runs; ++i) {
120  CAFFE_ENFORCE(Run(), "Main run ", i, " has failed.");
121  }
122  auto millis = timer.MilliSeconds();
123  LOG(INFO) << "Main run finished. Milliseconds per iter: "
124  << millis / main_runs
125  << ". Iters per second: " << 1000.0 * main_runs / millis;
126 
127  vector<float> time_per_op(operators_.size(), 0);
128  vector<uint64_t> flops_per_op;
129  vector<uint64_t> memory_bytes_per_op;
130  vector<uint64_t> param_bytes_per_op;
131  CaffeMap<string, float> time_per_op_type;
132  CaffeMap<string, float> flops_per_op_type;
133  CaffeMap<string, float> memory_bytes_per_op_type;
134  CaffeMap<string, float> param_bytes_per_op_type;
135  if (run_individual) {
136  for (int i = 0; i < main_runs; ++i) {
137  for (auto& op : operators_) {
138  op->ResetEvent();
139  }
140  int idx = 0;
141  for (auto& op : operators_) {
142  const string& op_type = op->debug_def().type();
143  if (i == 0) { // Gather flops on the first run.
144  auto* schema = OpSchemaRegistry::Schema(op_type);
145  if (schema && schema->HasCostInferenceFunction()) {
146  vector<TensorShape> shapes = op->InputTensorShapes();
147 
148  OpSchema::Cost cost = schema->InferCost(op->debug_def(), shapes);
149 
150  flops_per_op.emplace_back(cost.flops);
151  memory_bytes_per_op.emplace_back(cost.bytes_moved);
152  param_bytes_per_op.emplace_back(cost.params_bytes);
153 
154  flops_per_op_type[op_type] += cost.flops;
155  memory_bytes_per_op_type[op_type] += cost.bytes_moved;
156  param_bytes_per_op_type[op_type] += cost.params_bytes;
157  }
158  }
159  timer.Start();
160  CAFFE_ENFORCE(
161  op->Run(),
162  "operator ",
163  op->debug_def().name(),
164  "(",
165  op_type,
166  ") has failed.");
167  float spent = timer.MilliSeconds();
168  time_per_op[idx] += spent;
169  time_per_op_type[op_type] += spent;
170  ++idx;
171  }
172  }
173  int idx = 0;
174  for (auto& op : operators_) {
175  const string& op_type = op->debug_def().type();
176  const string& print_name =
177  (op->debug_def().name().size()
178  ? op->debug_def().name()
179  : (op->debug_def().output_size() ? op->debug_def().output(0)
180  : "NO_OUTPUT"));
181  std::stringstream flops_str;
182  if (idx < flops_per_op.size() && flops_per_op[idx]) {
183  flops_str << " (" << to_string(1.0e-9 * flops_per_op[idx]) << " GFLOP, "
184  << to_string(1.0e-6 * flops_per_op[idx] / time_per_op[idx])
185  << " GFLOPS)";
186  }
187  std::stringstream memory_bytes_str;
188  if (idx < memory_bytes_per_op.size() && memory_bytes_per_op[idx]) {
189  memory_bytes_str << " (" << to_string(1.0e-6 * memory_bytes_per_op[idx])
190  << " MB)";
191  }
192  std::stringstream param_bytes_str;
193  if (idx < param_bytes_per_op.size() && param_bytes_per_op[idx]) {
194  memory_bytes_str << " (" << to_string(1.0e-6 * param_bytes_per_op[idx])
195  << " MB)";
196  }
197  LOG(INFO) << "Operator #" << idx << " (" << print_name << ", " << op_type
198  << ") " << time_per_op[idx] / main_runs << " ms/iter"
199  << flops_str.str() << memory_bytes_str.str()
200  << param_bytes_str.str();
201  ++idx;
202  }
203  const std::vector<string> metric(
204  {"Time", "FLOP", "Feature Memory", "Parameter Memory"});
205  const std::vector<double> normalizer(
206  {1.0 / main_runs, 1.0e-9, 1.0e-6, 1.0e-6});
207  const std::vector<string> unit({"ms", "GFLOP", "MB", "MB"});
208 
209  std::vector<CaffeMap<string, float>*> metric_per_op_type_vec_vec;
210  metric_per_op_type_vec_vec.emplace_back(&time_per_op_type);
211  metric_per_op_type_vec_vec.emplace_back(&flops_per_op_type);
212  metric_per_op_type_vec_vec.emplace_back(&memory_bytes_per_op_type);
213  metric_per_op_type_vec_vec.emplace_back(&param_bytes_per_op_type);
214  for (int i = 0; i < metric_per_op_type_vec_vec.size(); ++i) {
215  LOG(INFO) << metric[i] << " per operator type:";
216  auto* item = metric_per_op_type_vec_vec[i];
217  std::vector<std::pair<string, float>> metric_per_op_type_vec(
218  (*item).begin(), (*item).end());
219  std::sort(
220  metric_per_op_type_vec.begin(),
221  metric_per_op_type_vec.end(),
222  PairLargerThan<string, float>);
223  float total_metric = 0.;
224  for (const auto& op_item : metric_per_op_type_vec) {
225  total_metric += op_item.second * normalizer[i];
226  }
227  for (const auto& op_item : metric_per_op_type_vec) {
228  float percent = 0.;
229  if (total_metric > 0.) {
230  percent = (100.0 * op_item.second * normalizer[i] / total_metric);
231  }
232  LOG(INFO) << std::setw(15) << std::setfill(' ')
233  << op_item.second * normalizer[i] << " " << unit[i] << ". "
234  << std::setw(10) << std::setfill(' ') << percent << "%. "
235  << op_item.first;
236  }
237  LOG(INFO) << std::setw(15) << std::setfill(' ') << total_metric << " "
238  << unit[i] << " in Total";
239  }
240  }
241  // We will reuse time_per_op to return the result of BenchmarkNet.
242  for (int i = 0; i < time_per_op.size(); ++i) {
243  time_per_op[i] /= main_runs;
244  }
245  time_per_op.insert(time_per_op.begin(), millis / main_runs);
246  return time_per_op;
247 }
248 
249 REGISTER_NET(simple, SimpleNet);
250 
251 } // namespace caffe2
void Start()
Starts a timer.
Definition: timer.h:40
Copyright (c) 2016-present, Facebook, Inc.
float MilliSeconds()
Returns the elapsed time in milliseconds.
Definition: timer.h:48
vector< float > TEST_Benchmark(const int warmup_runs, const int main_runs, const bool run_individual) override
Benchmarks a network.
Definition: net_simple.cc:97
A simple timer object for measuring time.
Definition: timer.h:32