Caffe2 - C++ API
A deep learning, cross platform ML framework
stats_ops.cc
1 #include <chrono>
2 #include <vector>
3 #include "caffe2/core/operator.h"
4 #include "caffe2/core/stats.h"
5 #include "caffe2/core/tensor.h"
6 
7 namespace caffe2 {
8 
9 class StatRegistryCreateOp : public Operator<CPUContext> {
10  public:
11  template <class... Args>
12  explicit StatRegistryCreateOp(Args&&... args)
13  : Operator(std::forward<Args>(args)...) {}
14 
15  bool RunOnDevice() override {
16  *OperatorBase::Output<std::unique_ptr<StatRegistry>>(0) =
17  std::unique_ptr<StatRegistry>(new StatRegistry);
18  return true;
19  }
20 };
21 
22 class StatRegistryExportOp : public Operator<CPUContext> {
23  public:
24  template <class... Args>
25  explicit StatRegistryExportOp(Args&&... args)
26  : Operator(std::forward<Args>(args)...),
27  reset_(GetSingleArgument<bool>("reset", true)) {}
28 
29  bool RunOnDevice() override {
30  auto registry = InputSize() > 0
31  ? OperatorBase::Input<std::unique_ptr<StatRegistry>>(0).get()
32  : &StatRegistry::get();
33  auto* keys = Output(0);
34  auto* values = Output(1);
35  auto* timestamps = Output(2);
36  auto data = registry->publish(reset_);
37  keys->Resize(data.size());
38  values->Resize(data.size());
39  timestamps->Resize(data.size());
40  auto* pkeys = keys->template mutable_data<std::string>();
41  auto* pvals = values->template mutable_data<int64_t>();
42  auto* ptimestamps = timestamps->template mutable_data<int64_t>();
43  int i = 0;
44  for (const auto& stat : data) {
45  pkeys[i] = std::move(stat.key);
46  pvals[i] = stat.value;
47  ptimestamps[i] =
48  std::chrono::nanoseconds(stat.ts.time_since_epoch()).count();
49  ++i;
50  }
51  return true;
52  }
53 
54  private:
55  bool reset_;
56 };
57 
58 class StatRegistryUpdateOp : public Operator<CPUContext> {
59  public:
60  template <class... Args>
61  explicit StatRegistryUpdateOp(Args&&... args)
62  : Operator(std::forward<Args>(args)...) {}
63 
64  bool RunOnDevice() override {
65  const auto& keys = Input(0);
66  const auto& values = Input(1);
67  auto registry = InputSize() == 3
68  ? OperatorBase::Input<std::unique_ptr<StatRegistry>>(2).get()
69  : &StatRegistry::get();
70  CAFFE_ENFORCE_EQ(keys.numel(), values.numel());
71  ExportedStatList data(keys.numel());
72  auto* pkeys = keys.data<std::string>();
73  auto* pvals = values.data<int64_t>();
74  int i = 0;
75  for (auto& stat : data) {
76  stat.key = pkeys[i];
77  stat.value = pvals[i];
78  ++i;
79  }
80  registry->update(data);
81  return true;
82  }
83 };
84 
86  public:
87  explicit TimerInstance(const std::string& name)
88  : running_(false), stat_(name) {}
89 
90  void begin() {
91  CAFFE_ENFORCE(!running_, "Called TimerBegin on an already running timer.");
92  running_ = true;
93  start_ = std::chrono::high_resolution_clock::now();
94  }
95 
96  void end() {
97  CAFFE_ENFORCE(running_, "Called TimerEnd on a stopped timer.");
98  using namespace std::chrono;
99  auto duration = high_resolution_clock::now() - start_;
100  auto nanos = duration_cast<nanoseconds>(duration).count();
101  CAFFE_EVENT(stat_, time_ns, nanos);
102  running_ = false;
103  }
104 
105  int64_t get_ns() {
106  CAFFE_ENFORCE(running_, "Called TimerGet on a stopped timer.");
107  using namespace std::chrono;
108  auto duration = high_resolution_clock::now() - start_;
109  auto nanos = duration_cast<nanoseconds>(duration).count();
110  return nanos;
111  }
112 
113  private:
114  bool running_;
115  std::chrono::high_resolution_clock::time_point start_;
116 
117  struct TimerStat {
118  CAFFE_STAT_CTOR(TimerStat);
119  CAFFE_AVG_EXPORTED_STAT(time_ns);
120  } stat_;
121 };
122 
123 struct TimerBeginOp : public Operator<CPUContext> {
124  explicit TimerBeginOp(const OperatorDef& operator_def, Workspace* ws)
125  : Operator(operator_def, ws),
126  given_name_(GetSingleArgument<std::string>(
127  "counter_name",
128  operator_def.output().Get(0))),
129  timer_([this]() { return given_name_; }()) {}
130 
131  bool RunOnDevice() override {
132  *OperatorBase::Output<TimerInstance*>(0) = &timer_;
133  timer_.begin();
134  return true;
135  }
136 
137  private:
138  const std::string given_name_;
139  TimerInstance timer_;
140 };
141 
142 struct TimerEndOp : public Operator<CPUContext> {
143  template <class... Args>
144  explicit TimerEndOp(Args&&... args) : Operator(std::forward<Args>(args)...) {}
145 
146  bool RunOnDevice() override {
147  OperatorBase::Input<TimerInstance*>(0)->end();
148  return true;
149  }
150 };
151 
152 struct TimerGetAndEndOp : public Operator<CPUContext> {
153  template <class... Args>
154  explicit TimerGetAndEndOp(Args&&... args)
155  : Operator(std::forward<Args>(args)...) {}
156 
157  bool RunOnDevice() override {
158  int64_t nanos = OperatorBase::Input<TimerInstance*>(0)->get_ns();
159  OperatorBase::Input<TimerInstance*>(0)->end();
160  auto* res = Output(0);
161  res->Resize(1);
162  res->template mutable_data<int64_t>()[0] = nanos;
163  return true;
164  }
165 };
166 
167 struct TimerGetOp : public Operator<CPUContext> {
168  template <class... Args>
169  explicit TimerGetOp(Args&&... args) : Operator(std::forward<Args>(args)...) {}
170 
171  bool RunOnDevice() override {
172  int64_t nanos = OperatorBase::Input<TimerInstance*>(0)->get_ns();
173  auto* res = Output(0);
174  res->Resize();
175  res->template mutable_data<int64_t>()[0] = nanos;
176  return true;
177  }
178 };
179 
180 REGISTER_CPU_OPERATOR(StatRegistryCreate, StatRegistryCreateOp);
181 REGISTER_CPU_OPERATOR(StatRegistryUpdate, StatRegistryUpdateOp);
182 REGISTER_CPU_OPERATOR(StatRegistryExport, StatRegistryExportOp);
183 
184 REGISTER_CPU_OPERATOR(TimerBegin, TimerBeginOp);
185 REGISTER_CPU_OPERATOR(TimerEnd, TimerEndOp);
186 REGISTER_CPU_OPERATOR(TimerGetAndEnd, TimerGetAndEndOp);
187 REGISTER_CPU_OPERATOR(TimerGet, TimerGetOp);
188 
189 OPERATOR_SCHEMA(StatRegistryCreate)
190  .NumInputs(0)
191  .NumOutputs(1)
192  .SetDoc(R"DOC(
193 Create a StatRegistry object that will contain a map of performance counters
194 keyed by name. A StatRegistry is used to gather and retrieve performance
195 counts throughout the caffe2 codebase.
196 )DOC")
197  .Output(0, "handle", "A Blob pointing to the newly created StatRegistry.");
198 
199 OPERATOR_SCHEMA(StatRegistryUpdate)
200  .NumInputs(2, 3)
201  .NumOutputs(0)
202  .SetDoc(R"DOC(
203 Update the given StatRegistry, or the global StatRegistry,
204 with the values of counters for the given keys.
205 )DOC")
206  .Input(0, "keys", "1D string tensor with the key names to update.")
207  .Input(1, "values", "1D int64 tensor with the values to update.")
208  .Input(
209  2,
210  "handle",
211  "If provided, update the given StatRegistry. "
212  "Otherwise, update the global singleton.");
213 
214 OPERATOR_SCHEMA(StatRegistryExport)
215  .NumInputs(0, 1)
216  .NumOutputs(3)
217  .Input(
218  0,
219  "handle",
220  "If provided, export values from given StatRegistry."
221  "Otherwise, export values from the global singleton StatRegistry.")
222  .Output(0, "keys", "1D string tensor with exported key names")
223  .Output(1, "values", "1D int64 tensor with exported values")
224  .Output(2, "timestamps", "The unix timestamp at counter retrieval.")
225  .Arg(
226  "reset",
227  "(default true) Whether to atomically reset the counters afterwards.");
228 
229 OPERATOR_SCHEMA(TimerBegin)
230  .NumInputs(0)
231  .NumOutputs(1)
232  .SetDoc(R"DOC(
233 Start a wallclock timer, returning a scalar tensor containing a pointer to it. The timer is stopped by calling **TimerEnd**.
234 
235 Github Links:
236 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/stats_ops.cc
237 
238  )DOC")
239  .Arg("counter_name", "(*str*): name of the timer object; if not set use output name")
240  .Output(0, "timer", "(*Tensor`<ptr>`*): pointer to a timer object");
241 
242 OPERATOR_SCHEMA(TimerEnd)
243  .NumInputs(1)
244  .NumOutputs(0)
245  .SetDoc(R"DOC(
246 Stop a timer started with **TimerBegin**. Publishes a CAFFE_EVENT.
247 
248 Github Links:
249 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/stats_ops.cc
250 
251  )DOC")
252  .Input(0, "timer", "(*Tensor`<ptr>`*): pointer to a timer object; obtained from **TimerBegin** op");
253 
254 OPERATOR_SCHEMA(TimerGetAndEnd)
255  .NumInputs(1)
256  .NumOutputs(1)
257  .SetDoc(R"DOC(
258 Queries the current time of a timer in nanos, stops the timer publishing a CAFFE_EVENT.
259 
260 Github Links:
261 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/stats_ops.cc
262 
263 <details>
264 
265 <summary> <b>Example</b> </summary>
266 
267 **Code**
268 
269 ```
270 
271 workspace.ResetWorkspace()
272 
273 timerbegin_op = core.CreateOperator(
274  "TimerBegin",
275  [],
276  ["timer"]
277 )
278 
279 timerget_op = core.CreateOperator(
280  "TimerGet",
281  ["timer"],
282  ["nanos"]
283 )
284 
285 timerend_op = core.CreateOperator(
286  "TimerEnd",
287  ["timer"],
288  []
289 )
290 
291 timergetandend_op = core.CreateOperator(
292  "TimerGetAndEnd",
293  ["timer"],
294  ["nanos"]
295 )
296 
297 // Test TimerBegin/TimerGet/TimerEnd
298 workspace.RunOperatorOnce(timerbegin_op)
299 print("timer:", workspace.FetchBlob("timer"))
300 workspace.RunOperatorOnce(timerget_op)
301 print("nanos:", workspace.FetchBlob("nanos"))
302 workspace.RunOperatorOnce(timerend_op)
303 
304 
305 // Test TimerBegin/TimerGetAndEnd
306 workspace.RunOperatorOnce(timerbegin_op)
307 print("timer:", workspace.FetchBlob("timer"))
308 workspace.RunOperatorOnce(timergetandend_op)
309 print("nanos:", workspace.FetchBlob("nanos"))
310 
311 ```
312 
313 **Result**
314 
315 ```
316 
317 timer: b'timer, a C++ native class of type caffe2::TimerInstance*.'
318 nanos: 361140
319 timer: b'timer, a C++ native class of type caffe2::TimerInstance*.'
320 nanos: [252250]
321 
322 ```
323 
324 </details>
325 
326  )DOC")
327  .Input(0, "timer", "(*Tensor`<ptr>`*): pointer to a timer object; obtained from **TimerBegin** op")
328  .Output(0, "nanos", "(*Tensor`<int64>`*): scalar tensor containing time in nanoseconds");
329 
330 OPERATOR_SCHEMA(TimerGet)
331  .NumInputs(1)
332  .NumOutputs(1)
333  .SetDoc(R"DOC(
334 Queries the current time of a timer object in nanoseconds.
335 
336 Github Links:
337 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/stats_ops.cc
338 
339  )DOC")
340  .Input(0, "timer", "(*Tensor`<ptr>`*): pointer to a timer object; obtained from **TimerBegin** op")
341  .Output(0, "nanos", "(*Tensor`<int64>`*): scalar containing time in nanoseconds");
342 
343 CAFFE_KNOWN_TYPE(TimerInstance*);
344 CAFFE_KNOWN_TYPE(std::unique_ptr<caffe2::StatRegistry>);
345 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
const Tensor & Input(int idx, DeviceType type=CPUContext::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
std::vector< ExportedStatValue > ExportedStatList
Holds names and values of counters exported from a StatRegistry.
Definition: stats.h:40
static StatRegistry & get()
Retrieve the singleton StatRegistry, which gets populated through the CAFFE_EVENT macro...
Definition: stats.cc:49
Holds a map of atomic counters keyed by name.
Definition: stats.h:117