Caffe2 - C++ API
A deep learning, cross platform ML framework
perf_observer.cc
1 #include "observers/perf_observer.h"
2 #include "observers/observer_config.h"
3 #ifndef C10_MOBILE
4 #include "caffe2/core/flags.h"
5 #include "observers/net_observer_reporter_print.h"
6 #endif
7 
8 #include <random>
9 #include "caffe2/core/common.h"
10 #include "caffe2/core/init.h"
11 #include "caffe2/core/operator.h"
12 
13 #ifndef C10_MOBILE
14 C10_DEFINE_int64(
15  aiBench_netInitSampleRate,
16  0,
17  "One in N sampling rate for net delay");
18 
19 C10_DEFINE_int64(
20  aiBench_netFollowupSampleRate,
21  0,
22  "One in N sampling rate for net delay");
23 
24 C10_DEFINE_int64(
25  aiBench_netFollowupSampleCount,
26  0,
27  "control the following c logs");
28 
29 C10_DEFINE_int64(
30  aiBench_operatorNetSampleRatio,
31  0,
32  "One in N sampling rate for operator delay");
33 
34 C10_DEFINE_int64(
35  aiBench_skipIters,
36  0,
37  "skip the first N iterations of the net run");
38 #endif
39 
40 namespace caffe2 {
41 namespace {
42 
43 bool registerGlobalPerfNetObserverCreator(int* /*pargc*/, char*** /*pargv*/) {
44  AddGlobalNetObserverCreator([](NetBase* subject) {
45  return caffe2::make_unique<PerfNetObserver>(subject);
46  });
47 
48 #if !defined(C10_MOBILE)
49  // for aibench usage
50  caffe2::ObserverConfig::setReporter(
51  caffe2::make_unique<caffe2::NetObserverReporterPrint>());
52 
53  caffe2::ObserverConfig::initSampleRate(
54  FLAGS_aiBench_netInitSampleRate,
55  FLAGS_aiBench_netFollowupSampleRate,
56  FLAGS_aiBench_netFollowupSampleCount,
57  FLAGS_aiBench_operatorNetSampleRatio,
58  FLAGS_aiBench_skipIters);
59 #endif
60 
61  return true;
62 }
63 } // namespace
64 
65 REGISTER_CAFFE2_EARLY_INIT_FUNCTION(
66  registerGlobalPerfNetObserverCreator,
67  &registerGlobalPerfNetObserverCreator,
68  "Caffe2 net global observer creator");
69 
70 PerfNetObserver::PerfNetObserver(NetBase* subject_)
71  : NetObserver(subject_), numRuns_(0) {}
72 
73 PerfNetObserver::~PerfNetObserver() {}
74 
75 void PerfNetObserver::Start() {
76  static int visitCount = 0;
77  // Select whether to log the operator or the net.
78  // We have one sample rate for the entire app.
79  int netInitSampleRate = ObserverConfig::getNetInitSampleRate();
80  int netFollowupSampleRate = ObserverConfig::getNetFollowupSampleRate();
81  int netFollowupSampleCount = ObserverConfig::getNetFollowupSampleCount();
82  int operatorNetSampleRatio = ObserverConfig::getOpoeratorNetSampleRatio();
83  int skipIters = ObserverConfig::getSkipIters();
84  int sampleRate = visitCount > 0 ? netFollowupSampleRate : netInitSampleRate;
85  if (skipIters <= numRuns_ && sampleRate > 0 && rand() % sampleRate == 0) {
86  visitCount++;
87  if (visitCount == netFollowupSampleCount) {
88  visitCount = 0;
89  }
90  if (operatorNetSampleRatio > 0 && rand() % operatorNetSampleRatio == 0) {
91  logType_ = PerfNetObserver::OPERATOR_DELAY;
92  } else {
93  logType_ = PerfNetObserver::NET_DELAY;
94  }
95  } else {
96  logType_ = PerfNetObserver::NONE;
97  }
98  numRuns_++;
99 
100  if (logType_ == PerfNetObserver::OPERATOR_DELAY) {
101  /* Always recreate new operator observers
102  whenever we measure operator delay */
103  const auto& operators = subject_->GetOperators();
104  for (auto* op : operators) {
105  observerMap_[op] = op->AttachObserver(
106  caffe2::make_unique<PerfOperatorObserver>(op, this));
107  }
108  }
109 
110  if (logType_ != PerfNetObserver::NONE) {
111  /* Only start timer when we need to */
112  timer_.Start();
113  }
114 }
115 
116 void PerfNetObserver::Stop() {
117  if (logType_ == PerfNetObserver::NONE) {
118  return;
119  }
120  auto currentRunTime = timer_.MilliSeconds();
121  std::map<std::string, PerformanceInformation> info;
122  PerformanceInformation net_perf;
123  net_perf.latency = currentRunTime;
124  if (logType_ == PerfNetObserver::OPERATOR_DELAY) {
125  const auto& operators = subject_->GetOperators();
126  for (int idx = 0; idx < operators.size(); ++idx) {
127  const auto* op = operators[idx];
128  auto name = getObserverName(op, idx);
129  PerformanceInformation p;
130 
131  p.latency = static_cast<const PerfOperatorObserver*>(observerMap_[op])
132  ->getMilliseconds();
133 
134  p.engine = op->engine();
135  p.type = op->type();
136  p.tensor_shapes =
137  static_cast<const PerfOperatorObserver*>(observerMap_[op])
138  ->getTensorShapes();
139 
140  if (op->has_debug_def()) {
141  for (auto arg : op->debug_def().arg()) {
142  p.args.emplace_back(arg);
143  }
144  }
145 
146  info.insert({name, p});
147  }
148 
149  /* clear all operator delay after use so that we don't spent time
150  collecting the operator delay info in later runs */
151  for (auto* op : operators) {
152  op->DetachObserver(observerMap_[op]);
153  }
154  observerMap_.clear();
155  }
156  info.insert({"NET_DELAY", net_perf});
157  ObserverConfig::getReporter()->report(subject_, info);
158 }
159 
160 caffe2::string PerfNetObserver::getObserverName(const OperatorBase* op, int idx)
161  const {
162  string opType = op->has_debug_def() ? op->debug_def().type() : "NO_TYPE";
163  string displayName =
164  (op->has_debug_def() ? op->debug_def().name().size()
165  ? op->debug_def().name()
166  : (op->debug_def().output_size() ? op->debug_def().output(0)
167  : "NO_OUTPUT")
168  : "NO_DEF");
169  caffe2::string name =
170  "ID_" + c10::to_string(idx) + "_" + opType + "_" + displayName;
171  return name;
172 }
173 
174 PerfOperatorObserver::PerfOperatorObserver(
175  OperatorBase* op,
176  PerfNetObserver* netObserver)
177  : ObserverBase<OperatorBase>(op),
178  netObserver_(netObserver),
179  milliseconds_(0) {
180  CAFFE_ENFORCE(netObserver_, "Observers can't operate outside of the net");
181 }
182 
183 PerfOperatorObserver::~PerfOperatorObserver() {}
184 
185 void PerfOperatorObserver::Start() {
186  /* Get the time from the start of the net minus the time spent
187  in previous invocations. It is the time spent on other operators.
188  This way, when the operator finishes, the time from the start of the net
189  minus the time spent in all other operators is the total time on this
190  operator. This is done to avoid saving a timer in each operator */
191  milliseconds_ = netObserver_->getTimer().MilliSeconds() - milliseconds_;
192 }
193 
194 void PerfOperatorObserver::Stop() {
195  /* Time from the start of the net minus the time spent on all other
196  operators is the time spent on this operator */
197  milliseconds_ = netObserver_->getTimer().MilliSeconds() - milliseconds_;
198  tensor_shapes_ = subject_->InputTensorShapes();
199 }
200 
201 double PerfOperatorObserver::getMilliseconds() const {
202  return milliseconds_;
203 }
204 
205 std::vector<TensorShape> PerfOperatorObserver::getTensorShapes() const {
206  return tensor_shapes_;
207 }
208 
209 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13