Caffe2 - C++ API
A deep learning, cross platform ML framework
stats_ops.cc
1 
17 #include <chrono>
18 #include <vector>
19 #include "caffe2/core/operator.h"
20 #include "caffe2/core/stats.h"
21 #include "caffe2/core/tensor.h"
22 
23 namespace caffe2 {
24 
25 class StatRegistryCreateOp : public Operator<CPUContext> {
26  public:
27  StatRegistryCreateOp(const OperatorDef& operator_def, Workspace* ws)
28  : Operator(operator_def, ws) {}
29 
30  bool RunOnDevice() override {
31  *OperatorBase::Output<std::unique_ptr<StatRegistry>>(0) =
32  std::unique_ptr<StatRegistry>(new StatRegistry);
33  return true;
34  }
35 };
36 
37 class StatRegistryExportOp : public Operator<CPUContext> {
38  public:
39  StatRegistryExportOp(const OperatorDef& operator_def, Workspace* ws)
40  : Operator(operator_def, ws),
41  reset_(GetSingleArgument<bool>("reset", true)) {}
42 
43  bool RunOnDevice() override {
44  auto registry = InputSize() > 0
45  ? OperatorBase::Input<std::unique_ptr<StatRegistry>>(0).get()
46  : &StatRegistry::get();
47  auto* keys = Output(0);
48  auto* values = Output(1);
49  auto* timestamps = Output(2);
50  auto data = registry->publish(reset_);
51  keys->Resize(data.size());
52  values->Resize(data.size());
53  timestamps->Resize(data.size());
54  auto* pkeys = keys->mutable_data<std::string>();
55  auto* pvals = values->mutable_data<int64_t>();
56  auto* ptimestamps = timestamps->mutable_data<int64_t>();
57  int i = 0;
58  for (const auto& stat : data) {
59  pkeys[i] = std::move(stat.key);
60  pvals[i] = stat.value;
61  ptimestamps[i] =
62  std::chrono::nanoseconds(stat.ts.time_since_epoch()).count();
63  ++i;
64  }
65  return true;
66  }
67 
68  private:
69  bool reset_;
70 };
71 
72 class StatRegistryUpdateOp : public Operator<CPUContext> {
73  public:
74  StatRegistryUpdateOp(const OperatorDef& operator_def, Workspace* ws)
75  : Operator(operator_def, ws) {}
76 
77  bool RunOnDevice() override {
78  const auto& keys = Input(0);
79  const auto& values = Input(1);
80  auto registry = InputSize() == 3
81  ? OperatorBase::Input<std::unique_ptr<StatRegistry>>(2).get()
82  : &StatRegistry::get();
83  CAFFE_ENFORCE_EQ(keys.size(), values.size());
84  ExportedStatList data(keys.size());
85  auto* pkeys = keys.data<std::string>();
86  auto* pvals = values.data<int64_t>();
87  int i = 0;
88  for (auto& stat : data) {
89  stat.key = pkeys[i];
90  stat.value = pvals[i];
91  ++i;
92  }
93  registry->update(data);
94  return true;
95  }
96 };
97 
99  public:
100  explicit TimerInstance(const std::string& name)
101  : running_(false), stat_(name) {}
102 
103  void begin() {
104  CAFFE_ENFORCE(!running_, "Called TimerBegin on an already running timer.");
105  running_ = true;
106  start_ = std::chrono::high_resolution_clock::now();
107  }
108 
109  void end() {
110  CAFFE_ENFORCE(running_, "Called TimerEnd on a stopped timer.");
111  using namespace std::chrono;
112  auto duration = high_resolution_clock::now() - start_;
113  auto nanos = duration_cast<nanoseconds>(duration).count();
114  CAFFE_EVENT(stat_, time_ns, nanos);
115  running_ = false;
116  }
117 
118  int64_t get_ns() {
119  CAFFE_ENFORCE(running_, "Called TimerGet on a stopped timer.");
120  using namespace std::chrono;
121  auto duration = high_resolution_clock::now() - start_;
122  auto nanos = duration_cast<nanoseconds>(duration).count();
123  return nanos;
124  }
125 
126  private:
127  bool running_;
128  std::chrono::high_resolution_clock::time_point start_;
129 
130  struct TimerStat {
131  CAFFE_STAT_CTOR(TimerStat);
132  CAFFE_AVG_EXPORTED_STAT(time_ns);
133  } stat_;
134 };
135 
136 struct TimerBeginOp : public Operator<CPUContext> {
137  TimerBeginOp(const OperatorDef& operator_def, Workspace* ws)
138  : Operator(operator_def, ws),
139  given_name_(GetSingleArgument<std::string>(
140  "counter_name",
141  operator_def.output().Get(0))),
142  timer_([this]() { return given_name_; }()) {}
143 
144  bool RunOnDevice() override {
145  *OperatorBase::Output<TimerInstance*>(0) = &timer_;
146  timer_.begin();
147  return true;
148  }
149 
150  private:
151  const std::string given_name_;
152  TimerInstance timer_;
153 };
154 
155 struct TimerEndOp : public Operator<CPUContext> {
156  TimerEndOp(const OperatorDef& operator_def, Workspace* ws)
157  : Operator(operator_def, ws) {}
158 
159  bool RunOnDevice() override {
160  OperatorBase::Input<TimerInstance*>(0)->end();
161  return true;
162  }
163 };
164 
165 struct TimerGetAndEndOp : public Operator<CPUContext> {
166  TimerGetAndEndOp(const OperatorDef& operator_def, Workspace* ws)
167  : Operator(operator_def, ws) {}
168 
169  bool RunOnDevice() override {
170  int64_t nanos = OperatorBase::Input<TimerInstance*>(0)->get_ns();
171  OperatorBase::Input<TimerInstance*>(0)->end();
172  auto* res = OperatorBase::Output<TensorCPU>(0);
173  res->Resize(1);
174  res->template mutable_data<int64_t>()[0] = nanos;
175  return true;
176  }
177 };
178 
179 struct TimerGetOp : public Operator<CPUContext> {
180  TimerGetOp(const OperatorDef& operator_def, Workspace* ws)
181  : Operator(operator_def, ws) {}
182 
183  bool RunOnDevice() override {
184  int64_t nanos = OperatorBase::Input<TimerInstance*>(0)->get_ns();
185  auto* res = OperatorBase::Output<TensorCPU>(0);
186  res->Resize();
187  res->template mutable_data<int64_t>()[0] = nanos;
188  return true;
189  }
190 };
191 
192 struct CpuUtilizationReportOp : public Operator<CPUContext> {
193  CpuUtilizationReportOp(const OperatorDef& operator_def, Workspace* ws)
194  : Operator(operator_def, ws),
195  statsName_(GetSingleArgument<std::string>("stats_name", "utilization")),
196  stat_([this]() { return statsName_; }()) {}
197 
198  bool RunOnDevice() override {
199  float utilization = Input(0).template data<float>()[0];
200  // Utilization is a float value, but CAFFE_EVENT only keeps int64_t values.
201  // We will keep 100x of the received utilization to maintain accuracy.
202  CAFFE_EVENT(stat_, cpu_utilization, (int)(utilization * 100));
203  return true;
204  }
205 
206  private:
207  std::string statsName_;
208  struct CpuStats {
209  CAFFE_STAT_CTOR(CpuStats);
210  CAFFE_EXPORTED_STAT(cpu_utilization);
211  } stat_;
212 };
213 
214 REGISTER_CPU_OPERATOR(StatRegistryCreate, StatRegistryCreateOp);
215 REGISTER_CPU_OPERATOR(StatRegistryUpdate, StatRegistryUpdateOp);
216 REGISTER_CPU_OPERATOR(StatRegistryExport, StatRegistryExportOp);
217 
218 REGISTER_CPU_OPERATOR(TimerBegin, TimerBeginOp);
219 REGISTER_CPU_OPERATOR(TimerEnd, TimerEndOp);
220 REGISTER_CPU_OPERATOR(TimerGetAndEnd, TimerGetAndEndOp);
221 REGISTER_CPU_OPERATOR(TimerGet, TimerGetOp);
222 REGISTER_CPU_OPERATOR(CpuUtilizationReport, CpuUtilizationReportOp);
223 
224 OPERATOR_SCHEMA(StatRegistryCreate)
225  .NumInputs(0)
226  .NumOutputs(1)
227  .SetDoc(R"DOC(
228 Create a StatRegistry object that will contain a map of performance counters
229 keyed by name. A StatRegistry is used to gather and retrieve performance
230 counts throughout the caffe2 codebase.
231 )DOC")
232  .Output(0, "handle", "A Blob pointing to the newly created StatRegistry.");
233 
234 OPERATOR_SCHEMA(StatRegistryUpdate)
235  .NumInputs(2, 3)
236  .NumOutputs(0)
237  .SetDoc(R"DOC(
238 Update the given StatRegistry, or the global StatRegistry,
239 with the values of counters for the given keys.
240 )DOC")
241  .Input(0, "keys", "1D string tensor with the key names to update.")
242  .Input(1, "values", "1D int64 tensor with the values to update.")
243  .Input(
244  2,
245  "handle",
246  "If provided, update the given StatRegistry. "
247  "Otherwise, update the global singleton.");
248 
249 OPERATOR_SCHEMA(StatRegistryExport)
250  .NumInputs(0, 1)
251  .NumOutputs(3)
252  .Input(
253  0,
254  "handle",
255  "If provided, export values from given StatRegistry."
256  "Otherwise, export values from the global singleton StatRegistry.")
257  .Output(0, "keys", "1D string tensor with exported key names")
258  .Output(1, "values", "1D int64 tensor with exported values")
259  .Output(2, "timestamps", "The unix timestamp at counter retrieval.")
260  .Arg(
261  "reset",
262  "(default true) Whether to atomically reset the counters afterwards.");
263 
264 OPERATOR_SCHEMA(TimerBegin)
265  .NumInputs(0)
266  .NumOutputs(1)
267  .SetDoc(R"DOC(
268 Start a wallclock timer, returning a pointer to it.
269 The timer is stopped by calling TimerEnd)DOC")
270  .Arg("counter_name", "Name of the timer. If not provided, use output name.")
271  .Output(0, "timer", "Pointer to timer, to be passed to TimerEnd.");
272 
273 OPERATOR_SCHEMA(TimerEnd)
274  .NumInputs(1)
275  .NumOutputs(0)
276  .SetDoc("Stop a timer started with TimerBegin, publishing a CAFFE_EVENT")
277  .Input(0, "timer", "Pointer to timer, obtained from TimerBegin.");
278 
279 OPERATOR_SCHEMA(TimerGetAndEnd)
280  .NumInputs(1)
281  .NumOutputs(1)
282  .SetDoc(R"DOC(Queries the current time of a timer in nanos, stops the timer
283  publishing a CAFFE_EVENT)DOC")
284  .Input(0, "timer", "Pointer to timer, obtained from TimerBegin.")
285  .Output(0, "nanos", "nanoseconds in int64");
286 
287 OPERATOR_SCHEMA(TimerGet)
288  .NumInputs(1)
289  .NumOutputs(1)
290  .SetDoc(R"DOC(Queries the current time of a timer in nanos)DOC")
291  .Input(0, "timer", "Pointer to timer, obtained from TimerBegin.")
292  .Output(0, "nanos", "nanoseconds in int64");
293 
294 OPERATOR_SCHEMA(CpuUtilizationReport)
295  .NumInputs(1)
296  .NumOutputs(0)
297  .SetDoc(R"DOC(Report the delta in max CPU utilization observed so far in the
298  plan)DOC")
299  .Input(
300  0,
301  "utilization",
302  "Delta in max CPU utilization observed, in percentage as a float value")
303  .Arg("stats_name", "String name of the stat entry holding CPU utilization");
304 
305 CAFFE_KNOWN_TYPE(TimerInstance*);
306 CAFFE_KNOWN_TYPE(std::unique_ptr<caffe2::StatRegistry>);
307 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:63
Copyright (c) 2016-present, Facebook, Inc.
std::vector< ExportedStatValue > ExportedStatList
Holds names and values of counters exported from a StatRegistry.
Definition: stats.h:56
static StatRegistry & get()
Retrieve the singleton StatRegistry, which gets populated through the CAFFE_EVENT macro...
Definition: stats.cc:65
Holds a map of atomic counters keyed by name.
Definition: stats.h:133