Caffe2 - C++ API
A deep learning, cross platform ML framework
operator.cc
1 
17 #include "caffe2/core/operator.h"
18 
19 #include <algorithm>
20 
21 #include "caffe2/core/logging.h"
22 #include "caffe2/core/net.h"
23 #include "caffe2/core/operator_gradient.h"
24 #include "caffe2/core/tensor.h"
25 #include "caffe2/core/types.h"
26 #include "caffe2/core/workspace.h"
27 
28 #include "caffe2/proto/caffe2.pb.h"
29 #include "caffe2/utils/proto_utils.h"
30 #include "caffe2/utils/string_utils.h"
31 
32 CAFFE2_DEFINE_int(
33  caffe2_operator_max_engine_name_length,
34  10,
35  "Maximum engine name length to be stored");
36 CAFFE2_DEFINE_bool(
37  caffe2_disable_implicit_engine_preference,
38  false,
39  "If set, disable implicit engine preferences. This is useful for unit "
40  "testing and debugging cases.");
41 
42 namespace caffe2 {
43 
44 OperatorBase::OperatorBase(const OperatorDef& operator_def, Workspace* ws)
45  : operator_ws_(ws),
46  operator_def_(std::make_shared<OperatorDef>(operator_def)),
47  device_option_(
48  operator_def.has_device_option() ? operator_def.device_option()
49  : DeviceOption()),
50  event_(caffe2::make_unique<Event>(device_option_)) {
51  for (const string& input_str : operator_def.input()) {
52  auto* blob = ws->GetBlob(input_str);
53  CAFFE_ENFORCE(
54  blob != nullptr,
55  "op ",
56  operator_def.type(),
57  ": Encountered a non-existing input blob: ",
58  input_str);
59  inputs_.push_back(blob);
60  }
61 
62  GetOperatorLogger()(operator_def);
63 
64  for (const string& output_str : operator_def.output()) {
65  outputs_.push_back(CHECK_NOTNULL(ws->CreateBlob(output_str)));
66  }
67 }
68 
69 vector<TensorShape> OperatorBase::InputTensorShapes() {
70  vector<TensorShape> tps;
71  for (const auto& blob : inputs_) {
72  tps.push_back(GetTensorShapeOfBlob(blob));
73  }
74  return tps;
75 }
76 
77 namespace {
78 
79 PerOpEnginePrefType& g_per_op_engine_pref() {
80  static auto* g_per_op_engine_pref_ = new PerOpEnginePrefType();
81  return *g_per_op_engine_pref_;
82 }
83 
84 GlobalEnginePrefType& g_global_engine_pref() {
85  static auto* g_global_engine_pref_ =
86  new GlobalEnginePrefType{{DeviceType::CUDA, {"CUDNN"}}};
87  return *g_global_engine_pref_;
88 }
89 
90 unique_ptr<OperatorBase> TryCreateOperator(
91  const string& key, const OperatorDef& operator_def, Workspace* ws) {
92  auto type = operator_def.device_option().device_type();
93  CAFFE_ENFORCE(
94  gDeviceTypeRegistry()->count(type),
95  "Device type ",
96  type,
97  " not registered.");
98  OperatorRegistry* registry = gDeviceTypeRegistry()->at(type);
99  VLOG(1) << "Creating operator with device type " << type;
100  try {
101  return registry->Create(key, operator_def, ws);
102  } catch (const UnsupportedOperatorFeature& err) {
103  LOG(WARNING) << "Operator " << operator_def.type()
104  << " does not support the requested feature. Msg: "
105  << err.what()
106  << ". Proto is: " << ProtoDebugString(operator_def);
107  return nullptr;
108  }
109 }
110 
111 unique_ptr<OperatorBase> _CreateOperator(
112  const OperatorDef& operator_def,
113  Workspace* ws) {
114  static StaticLinkingProtector g_protector;
115  const auto op_type = operator_def.type();
116  const auto device_type = operator_def.device_option().device_type();
117 
118 #ifndef CAFFE2_NO_OPERATOR_SCHEMA
119  // first, check with OpSchema if the operator is legal.
120  auto* schema = OpSchemaRegistry::Schema(op_type);
121  if (schema) {
122  CAFFE_ENFORCE(
123  schema->Verify(operator_def),
124  "Operator def did not pass schema checking: ",
125  ProtoDebugString(operator_def));
126  } else {
127  // We would like to recommend every op to register its schema, so if there
128  // is not one, we print a LOG_ERROR. But we will still allow the operator
129  // to be constructed.
130  LOG(ERROR) << "Cannot find operator schema for " << op_type
131  << ". Will skip schema checking.";
132  }
133 #endif
134 
135  // second try engines specified in the operator_def and preferred engines
136  std::vector<std::string> engines{};
137  if (operator_def.engine().size()) {
138  const auto op_def_engines = split(',', operator_def.engine());
139  engines.insert(engines.end(), op_def_engines.begin(), op_def_engines.end());
140  }
141  if (!FLAGS_caffe2_disable_implicit_engine_preference &&
142  g_per_op_engine_pref().count(device_type) &&
143  g_per_op_engine_pref()[device_type].count(op_type)) {
144  const auto& preferred_engines =
145  g_per_op_engine_pref()[device_type][op_type];
146  VLOG(2) << "Inserting per-op engine preference: " << preferred_engines;
147  engines.insert(
148  engines.end(), preferred_engines.begin(), preferred_engines.end());
149  }
150  if (!FLAGS_caffe2_disable_implicit_engine_preference &&
151  g_global_engine_pref().count(device_type)) {
152  const auto& preferred_engines = g_global_engine_pref()[device_type];
153  VLOG(2) << "Inserting global engine preference: " << preferred_engines;
154  engines.insert(
155  engines.end(), preferred_engines.begin(), preferred_engines.end());
156  }
157  for (const auto& engine : engines) {
158  const std::string key = OpRegistryKey(op_type, engine);
159  VLOG(1) << "Trying to create operator " << op_type << " with engine "
160  << engine;
161  auto op = TryCreateOperator(key, operator_def, ws);
162  if (op) {
163  if (engine.size() <= FLAGS_caffe2_operator_max_engine_name_length) {
164  op->annotate_engine(engine);
165  } else {
166  op->annotate_engine(
167  engine.substr(0, FLAGS_caffe2_operator_max_engine_name_length));
168  }
169  return op;
170  } else {
171  // If the above fails, we will just return the normal case with the
172  // default implementation.
173  VLOG(1) << "Operator with engine " << engine << " is not available.";
174  }
175  }
176  VLOG(1) << "Using default implementation.";
177 
178  // Lastly, if the engine does not work here, try using the default engine.
179  auto op = TryCreateOperator(op_type, operator_def, ws);
180  CAFFE_ENFORCE(
181  op,
182  "Cannot create operator of type '",
183  op_type,
184  "' on the device '",
185  DeviceTypeName(device_type),
186  "'. Verify that implementation for the corresponding device exist. It "
187  "might also happen if the binary is not linked with the operator "
188  "implementation code. If Python frontend is used it might happen if "
189  "dyndep.InitOpsLibrary call is missing. Operator def: ",
190  ProtoDebugString(operator_def));
191  return op;
192 }
193 
194 } // namespace
195 
196 const std::string OpRegistryKey(
197  const std::string& op_type,
198  const std::string& engine) {
199  if (engine == "" || engine == "DEFAULT") {
200  return op_type;
201  } else {
202  return op_type + "_ENGINE_" + engine;
203  }
204 }
205 
206 void SetPerOpEnginePref(const PerOpEnginePrefType& per_op_engine_pref) {
207  for (const auto& device_pref_pair : per_op_engine_pref) {
208  const auto& device_type = device_pref_pair.first;
209  CAFFE_ENFORCE(
210  gDeviceTypeRegistry()->count(device_type),
211  "Device type ",
212  device_type,
213  " not registered.");
214  auto* registry = gDeviceTypeRegistry()->at(device_type);
215 
216  for (const auto& op_pref_pair : device_pref_pair.second) {
217  const auto& op_type = op_pref_pair.first;
218  CAFFE_ENFORCE(
219  registry->Has(op_type),
220  "Operator type ",
221  op_type,
222  " not registered in ",
223  device_type,
224  " registry.");
225  }
226  }
227  g_per_op_engine_pref() = per_op_engine_pref;
228 }
229 
230 void SetGlobalEnginePref(const GlobalEnginePrefType& global_engine_pref) {
231  for (const auto& device_pref_pair : global_engine_pref) {
232  const auto& device_type = device_pref_pair.first;
233  CAFFE_ENFORCE(
234  gDeviceTypeRegistry()->count(device_type),
235  "Device type ",
236  device_type,
237  " not registered.");
238  }
239  g_global_engine_pref() = global_engine_pref;
240 }
241 
242 void SetEnginePref(
243  const PerOpEnginePrefType& per_op_engine_pref,
244  const GlobalEnginePrefType& global_engine_pref) {
245  SetPerOpEnginePref(per_op_engine_pref);
246  SetGlobalEnginePref(global_engine_pref);
247 }
248 
249 void SetOpEnginePref(
250  const std::string& op_type,
251  const CaffeMap<int, EnginePrefType>& op_pref) {
252  for (const auto& device_pref_pair : op_pref) {
253  const auto& device_type = device_pref_pair.first;
254  CAFFE_ENFORCE(
255  gDeviceTypeRegistry()->count(device_type),
256  "Device type ",
257  device_type,
258  " not registered.");
259  CAFFE_ENFORCE(
260  gDeviceTypeRegistry()->at(device_type)->Has(op_type),
261  "Operator type ",
262  op_type,
263  " not registered in ",
264  device_type,
265  " registry.");
266  g_per_op_engine_pref()[device_type][op_type] = device_pref_pair.second;
267  }
268 }
269 
270 unique_ptr<OperatorBase> CreateOperator(
271  const OperatorDef& operator_def,
272  Workspace* ws,
273  int net_position) {
274  try {
275  auto op = _CreateOperator(operator_def, ws);
276  op->set_net_position(net_position);
277  return op;
278  } catch (...) {
279  if (net_position != 0) {
280  VLOG(1) << "Operator constructor with net position " << net_position
281  << " failed";
282  ws->last_failed_op_net_position = net_position;
283  } else {
284  VLOG(1) << "Failed operator constructor doesn't have an id set";
285  }
286  throw;
287  }
288 }
289 
290 std::map<int32_t, OperatorRegistry*>* gDeviceTypeRegistry() {
291  static std::map<int32_t, OperatorRegistry*> g_device_type_registry;
292  return &g_device_type_registry;
293 }
294 
295 CAFFE_DEFINE_REGISTRY(
296  CPUOperatorRegistry,
297  OperatorBase,
298  const OperatorDef&,
299  Workspace*);
300 CAFFE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry);
301 
302 CAFFE_DEFINE_REGISTRY(
303  CUDAOperatorRegistry,
304  OperatorBase,
305  const OperatorDef&,
306  Workspace*);
307 CAFFE_REGISTER_DEVICE_TYPE(DeviceType::CUDA, CUDAOperatorRegistry);
308 
309 CAFFE_DEFINE_REGISTRY(
310  GradientRegistry,
311  GradientMakerBase,
312  const OperatorDef&, const vector<GradientWrapper>&);
313 
315  const OperatorDef& def, const vector<GradientWrapper>& g_output) {
316  std::unique_ptr<GradientMakerBase> maker(
317  GradientRegistry()->Create(def.type(), def, g_output));
318  CAFFE_ENFORCE(maker,
319  "Gradient maker for operator ", def.type(), " not implemented.");
320  GradientOpsMeta meta = maker->Get();
321  // Copy device option, engine, and arguments if needed.
322  if (maker->CopyDeviceOption() && def.has_device_option()) {
323  for (OperatorDef& grad_def : meta.ops_) {
324  grad_def.mutable_device_option()->CopyFrom(def.device_option());
325  }
326  }
327  // Copy engine if needed.
328  if (maker->CopyEngine() && def.has_engine()) {
329  for (OperatorDef& grad_def : meta.ops_) {
330  grad_def.set_engine(def.engine());
331  }
332  }
333  // Copy arguments if needed.
334  if (maker->CopyArguments() && def.arg_size()) {
335  for (OperatorDef& grad_def : meta.ops_) {
336  for (auto& arg : def.arg()) {
337  grad_def.add_arg()->CopyFrom(arg);
338  }
339  }
340  }
341  // VLOG for debugging purposes.
342  for (const OperatorDef& grad_def : meta.ops_) {
343  VLOG(1) << "Gradient ops: " << ProtoDebugString(grad_def);
344  }
345  // Check if the gradient computation has returned the right size for the
346  // gradient vector.
347  CAFFE_ENFORCE_EQ(meta.g_input_.size(), def.input_size());
348  VLOG(1) << "Gradients:";
349  for (const GradientWrapper& grad : meta.g_input_) {
350  // The gradient should either be (1) not set, or (2) dense, or (3) sparse,
351  // but cannot be both dense and sparse.
352  if (!grad.IsDense() && !grad.IsSparse()) {
353  VLOG(1) << "\t [no gradient]";
354  } else if (grad.IsDense()) {
355  VLOG(1) << "\t [dense]" << grad.dense_;
356  } else {
357  CAFFE_ENFORCE(
358  grad.indices_.size() && grad.values_.size(),
359  "For sparse gradient, one should set both indices and values. "
360  "Currently we have: (" +
361  grad.indices_ + ", " + grad.values_ + ").");
362  VLOG(1) << "\t [sparse] " << grad.indices_ << ", " << grad.values_;
363  }
364  }
365  return meta;
366 }
367 
368 static TensorShapes InferBlobShapesAndTypes(
369  CaffeMap<string, TensorShape>& blob_desc,
370  const vector<std::unique_ptr<NetDef>>& nets) {
371  for (auto& defptr : nets) {
372  // Hack to work with auto split gradients
373  CaffeMap<string, string> unmatched_sum_blobs;
374  CaffeMap<string, TensorShape> reshape_cache;
375 
376  for (const OperatorDef& op : defptr.get()->op()) {
377  // Hack to ignore queues
378  if (op.type().find("Dequeue") != std::string::npos ||
379  op.type().find("Enqueue") != std::string::npos) {
380  continue;
381  }
382 
383  vector<TensorShape> input_desc;
384  bool found_all = true;
385  for (const string& in : op.input()) {
386  auto inp_desc = blob_desc.find(in);
387  if (inp_desc == blob_desc.end()) {
388  LOG(WARNING) << "Shape and type inference failed for input: " << in
389  << " for op " << op.type() << ", skipping.";
390  found_all = false;
391  break;
392  }
393  input_desc.push_back(inp_desc->second);
394  }
395  if (!found_all) {
396  continue;
397  }
398  auto op_schema = OpSchemaRegistry::Schema(op.type());
399  if (op_schema == nullptr) {
400  LOG(WARNING) << "Shape inference failed, no schema for: " << op.type();
401  continue;
402  }
403 
404  // Special handling for Sum as it used with the autosplits, which have
405  // different naming convention. Assuming that all sum inputs must be of
406  // same size, we can infer their shapes.
407  if (op.type() == "Sum") {
408  TensorShape sum_shape;
409  for (auto inp : op.input()) {
410  auto it = blob_desc.find(inp);
411  if (it != blob_desc.end() && !it->second.unknown_shape()) {
412  if (it->second.dims_size() > 0) {
413  sum_shape = blob_desc[inp];
414  break;
415  }
416  }
417  }
418  for (auto inp : op.input()) {
419  auto it = blob_desc.find(inp);
420  if (it == blob_desc.end() || it->second.unknown_shape()) {
421  blob_desc[inp] = sum_shape;
422  if (sum_shape.dims_size() == 0) {
423  // Match later with the output
424  unmatched_sum_blobs[inp] = op.output(0);
425  }
426  }
427  }
428  }
429 
430  if (op.type() == "Reshape" && op.is_gradient_op()) {
431  CAFFE_ENFORCE(reshape_cache.find(op.input(1)) != reshape_cache.end());
432  TensorShape cached = reshape_cache[op.input(1)];
433  blob_desc[op.output(0)] = cached;
434  continue;
435  }
436 
437  std::vector<TensorShape> out;
438  try {
439  out = op_schema->InferTensor(op, input_desc);
440  if (op.is_gradient_op() && out.size()) {
441  // Special handling for gradient ops. We can assume gradients
442  // are of same size as the corresponding variables. This is bit
443  // ugly to base on string matching, but we don't have the connection
444  // between variable and its gradient specified
445 
446  CaffeMap<string, string> grads_to_params =
447  GradientMakerBase::MatchGradsToParams(op);
448 
449  for (int i = 0; i < out.size(); i++) {
450  if (out[i].unknown_shape()) {
451  std::string gradout = op.output(i);
452 
453  if (grads_to_params.find(gradout) != grads_to_params.end()) {
454  std::string var = grads_to_params[gradout];
455  if (blob_desc.find(var) != blob_desc.end()) {
456  out[i] = blob_desc[var];
457  }
458  }
459  }
460  }
461  }
462 
463  if (op.type() == "Reshape") {
464  // Reshape stores the original input shape to its second output
465  // blob. We need this for gradient reshape.
466  reshape_cache[op.output(1)] = input_desc[0];
467  }
468 
469  } catch (::caffe2::EnforceNotMet& enf) {
470  LOG(ERROR) << "Shape inference error: " << enf.msg();
471  LOG(ERROR) << "Operator: " << ProtoDebugString(op) << std::endl;
472  LOG(ERROR) << "Returning empty results.";
473 
474  TensorShapes tps;
475  return tps;
476  }
477 
478  if (out.size() != op.output_size()) {
479  if (op.type() == "Slice") {
480  CAFFE_ENFORCE(
481  out.size() == 0,
482  "For Slice operator, either shape of all output blobs are "
483  "inferred or shape of none can be inferred.");
484  } else {
485  CAFFE_THROW(
486  "Invalid shape inference for operator ",
487  op.type(),
488  " Expected ",
489  op.output_size(),
490  " outputs, but got ",
491  out.size());
492  }
493  } else {
494  for (int i = 0; i < out.size(); i++) {
495  blob_desc[op.output(i)] = out[i];
496  }
497  }
498  } // net.ops
499 
500  for (auto& unmatched : unmatched_sum_blobs) {
501  if (blob_desc.find(unmatched.second) != blob_desc.end()) {
502  blob_desc[unmatched.first] = blob_desc[unmatched.second];
503  }
504  }
505 
506  } // nets
507  TensorShapes tps;
508  for (auto kv : blob_desc) {
509  TensorShape& tp = kv.second;
510  TensorShape* tpnew = tps.add_shapes();
511  tpnew->CopyFrom(tp);
512  tpnew->set_name(kv.first);
513  }
514  return tps;
515 }
516 
517 TensorShape GetTensorShapeOfBlob(const Blob* b) {
518  TypeCall type_fun = GetTypeCallFunction(b->meta().id());
519  TensorInfoCall tensor_info_fun = GetTensorInfoFunction(b->meta().id());
520  TensorShape tp;
521 
522  if (type_fun) {
523  tp.set_data_type(TypeMetaToDataType(type_fun(b->GetRaw())));
524  }
525  if (tensor_info_fun) {
526  bool _shares_data;
527  size_t _capacity;
528  DeviceOption _device;
529  auto shape =
530  tensor_info_fun(b->GetRaw(), &_shares_data, &_capacity, &_device);
531  for (auto d : shape) {
532  tp.add_dims(d);
533  }
534  } else {
535  tp.set_unknown_shape(true);
536  }
537  return tp;
538 }
539 
540 TensorShapes InferBlobShapesAndTypesFromWorkspace(
541  Workspace* ws,
542  const vector<std::unique_ptr<NetDef>>& nets) {
543  CaffeMap<string, TensorShape> blob_desc;
544  // Populate shapes from workplace
545  const std::vector<string>& ws_blobs = ws->Blobs();
546  for (const auto& s : ws_blobs) {
547  Blob* b = ws->GetBlob(s);
548  TensorShape tp = GetTensorShapeOfBlob(b);
549  blob_desc[s] = tp;
550  }
551  return InferBlobShapesAndTypes(blob_desc, nets);
552 }
553 
554 TensorShapes InferBlobShapesAndTypesFromMap(
555  const CaffeMap<std::string, std::vector<TIndex>>& blob_dimensions,
556  const vector<std::unique_ptr<NetDef>>& nets) {
557  CaffeMap<string, TensorShape> blob_desc;
558  // Populate shapes from known blobs
559  for (const auto& blob : blob_dimensions) {
560  TensorShape tp;
561  for (auto d : blob.second) {
562  CAFFE_ENFORCE_GT(d, 0);
563  tp.add_dims(d);
564  }
565  blob_desc[blob.first] = tp;
566  }
567  return InferBlobShapesAndTypes(blob_desc, nets);
568 }
569 
570 std::map<string, std::pair<DeviceOption, DeviceOption>> ValidateTensorDevices(
571  OperatorBase& op,
572  const OperatorDef& op_def) {
573  std::map<string, std::pair<DeviceOption, DeviceOption>> mismatches;
574  DeviceOption op_device = op_def.device_option();
575 
576 #ifndef CAFFE2_NO_OPERATOR_SCHEMA
577  // Check from op schema if this op is used for crossing devices
578  auto op_schema = OpSchemaRegistry::Schema(op_def.type());
579  if (op_schema != nullptr) {
580  if (op_schema->inputs_can_cross_devices()) {
581  return mismatches;
582  }
583  }
584 #endif // CAFFE2_NO_OPERATOR_SCHEMA
585 
586  auto Check = [&](const Blob& blob, std::string blob_name) {
587  TensorInfoCall tensor_info_fun = GetTensorInfoFunction(blob.meta().id());
588  if (tensor_info_fun) {
589  bool _shares_data;
590  size_t _capacity;
591  DeviceOption blob_device;
592  tensor_info_fun(
593  const_cast<Blob&>(blob).GetRaw(),
594  &_shares_data,
595  &_capacity,
596  &blob_device);
597 
598  if (blob_device.device_type() == CUDA &&
599  blob_device.cuda_gpu_id() != op_device.cuda_gpu_id()) {
600  mismatches[blob_name] = std::make_pair(op_device, blob_device);
601  }
602  }
603  };
604 
605  // Check that inputs have same device type as the op
606  for (int i = 0; i < op.InputSize(); i++) {
607  Check(op.InputBlob(i), op_def.input(i));
608  }
609  for (int i = 0; i < op.OutputSize(); i++) {
610  Check(*op.OutputBlob(i), op_def.output(i));
611  }
612  return mismatches;
613 }
614 
615 } // namespace caffe2
Blob is a general container that hosts a typed pointer.
Definition: blob.h:41
Definition: types.h:88
A struct that holds the gradient operators and related gradient maps.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:63
const Blob * GetBlob(const string &name) const
Gets the blob with the given name as a const pointer.
Definition: workspace.cc:180
Copyright (c) 2016-present, Facebook, Inc.
const CaffeTypeId & id() const
Returns the type id.
Definition: typeid.h:153
const TypeMeta & meta() const
Returns the meta info of the blob.
Definition: blob.h:79
GradientOpsMeta GetGradientForOp(const OperatorDef &def, const vector< GradientWrapper > &g_output)
Gets the GradientOpsMeta for the given operator def.
Definition: operator.cc:314
vector< string > Blobs() const
Return a list of blob names.
Definition: workspace.cc:100