1 #include "caffe2/core/operator.h" 5 #include "caffe2/core/init.h" 6 #include "caffe2/core/logging.h" 7 #include "caffe2/core/net.h" 8 #include "caffe2/core/operator_gradient.h" 9 #include "caffe2/core/tensor.h" 10 #include "caffe2/core/types.h" 11 #include "caffe2/core/workspace.h" 13 #include "caffe2/proto/caffe2_pb.h" 14 #include "caffe2/utils/proto_utils.h" 15 #include "caffe2/utils/string_utils.h" 17 #include "caffe2/core/operator_c10wrapper.h" 20 caffe2_operator_max_engine_name_length,
22 "Maximum engine name length to be stored");
24 caffe2_disable_implicit_engine_preference,
26 "If set, disable implicit engine preferences. This is useful for unit " 27 "testing and debugging cases.");
29 caffe2_operator_throw_if_fp_exceptions,
31 "If set, throws if floating point exceptions (FE_DIVBYZERO, FE_INVALID, " 32 "FE_OVERFLOW) are detected when running any operator.");
36 OperatorBase::OperatorBase(
const OperatorDef& operator_def, Workspace* ws)
38 operator_def_(
std::make_shared<OperatorDef>(operator_def)),
40 operator_def.has_device_option() ? operator_def.device_option()
42 input_size_(operator_def.input_size()),
43 event_(
caffe2::make_unique<Event>(device_option_)) {
44 static GlobalInitIsCalledGuard guard;
45 for (
const string& input_str : operator_def.input()) {
46 auto* blob = ws->GetBlob(input_str);
51 ": Encountered a non-existing input blob: ",
53 inputs_.push_back(blob);
56 GetOperatorLogger()(operator_def);
58 for (
const string& output_str : operator_def.output()) {
59 outputs_.push_back(CHECK_NOTNULL(ws->CreateBlob(output_str)));
62 type_ = operator_def.type();
66 int compute_input_size_(
const std::vector<c10::IValue>& inputs) {
70 if (inputs[0].isTensorList()) {
75 return inputs[0].toTensorListRef().size();
78 size_t num_tensor_inputs = 0;
79 bool found_nontensor =
false;
80 for (
const auto& input : inputs) {
81 if (input.isTensor()) {
84 "All tensor arguments must come before non-tensor arguments");
87 found_nontensor =
true;
90 return num_tensor_inputs;
94 OperatorBase::OperatorBase(
96 std::vector<c10::IValue> inputs,
97 std::vector<at::Tensor> outputs)
98 : fn_schema_(make_unique<
c10::FunctionSchema>(
std::move(fn_schema))),
99 newstyle_inputs_(
std::move(inputs)),
100 newstyle_outputs_(
std::move(outputs)),
101 input_size_(compute_input_size_(newstyle_inputs_)) {
102 input_tensors_.resize(input_size_);
103 output_tensors_.resize(newstyle_outputs_.size());
106 vector<TensorShape> OperatorBase::InputTensorShapes()
const {
107 vector<TensorShape> tps;
108 for (
const auto& blob : inputs_) {
109 tps.push_back(GetTensorShapeOfBlob(blob));
116 PerOpEnginePrefType& g_per_op_engine_pref() {
117 static auto* g_per_op_engine_pref_ =
new PerOpEnginePrefType();
118 return *g_per_op_engine_pref_;
121 GlobalEnginePrefType& g_global_engine_pref() {
122 static auto* g_global_engine_pref_ =
123 new GlobalEnginePrefType{{CUDA, {
"CUDNN"}}, {HIP, {
"MIOPEN"}}};
124 return *g_global_engine_pref_;
127 unique_ptr<OperatorBase> TryCreateOperator(
129 const OperatorDef& operator_def,
131 const auto& type_proto = operator_def.device_option().device_type();
132 const auto& type = ProtoToType(static_cast<DeviceTypeProto>(type_proto));
134 gDeviceTypeRegistry()->count(type),
138 OperatorRegistry* registry = gDeviceTypeRegistry()->at(type);
139 VLOG(1) <<
"Creating operator with device type " << type;
141 return registry->Create(key, operator_def, ws);
142 }
catch (
const UnsupportedOperatorFeature& err) {
143 LOG(WARNING) <<
"Operator " << operator_def.type()
144 <<
" does not support the requested feature. Msg: " 146 <<
". Proto is: " << ProtoDebugString(operator_def);
151 unique_ptr<OperatorBase> _CreateOperator(
152 const OperatorDef& operator_def,
154 static StaticLinkingProtector g_protector;
155 const auto& op_type = operator_def.type();
156 const auto& device_type_proto = operator_def.device_option().device_type();
157 const auto& device_type =
158 ProtoToType(static_cast<DeviceTypeProto>(device_type_proto));
160 #ifndef CAFFE2_NO_OPERATOR_SCHEMA 162 auto* schema = OpSchemaRegistry::Schema(op_type);
165 schema->Verify(operator_def),
166 "Operator def did not pass schema checking: ",
167 ProtoDebugString(operator_def));
172 LOG(ERROR) <<
"Cannot find operator schema for " << op_type
173 <<
". Will skip schema checking.";
178 std::vector<std::string> engines{};
179 if (operator_def.engine().size()) {
180 const auto op_def_engines = split(
',', operator_def.engine());
181 engines.insert(engines.end(), op_def_engines.begin(), op_def_engines.end());
183 if (!FLAGS_caffe2_disable_implicit_engine_preference &&
184 g_per_op_engine_pref().count(device_type) &&
185 g_per_op_engine_pref()[device_type].count(op_type)) {
186 const auto& preferred_engines =
187 g_per_op_engine_pref()[device_type][op_type];
188 VLOG(2) <<
"Inserting per-op engine preference: " << preferred_engines;
190 engines.end(), preferred_engines.begin(), preferred_engines.end());
192 if (!FLAGS_caffe2_disable_implicit_engine_preference &&
193 g_global_engine_pref().count(device_type)) {
194 const auto& preferred_engines = g_global_engine_pref()[device_type];
195 VLOG(2) <<
"Inserting global engine preference: " << preferred_engines;
197 engines.end(), preferred_engines.begin(), preferred_engines.end());
199 for (
const auto& engine : engines) {
200 const std::string key = OpRegistryKey(op_type, engine);
201 VLOG(1) <<
"Trying to create operator " << op_type <<
" with engine " 203 auto op = TryCreateOperator(key, operator_def, ws);
206 (unsigned)FLAGS_caffe2_operator_max_engine_name_length) {
207 op->annotate_engine(engine);
210 engine.substr(0, FLAGS_caffe2_operator_max_engine_name_length));
216 VLOG(1) <<
"Engine " << engine
217 <<
" is not available for operator " << op_type <<
".";
220 if (operator_def.engine().size() && !VLOG_IS_ON(1)) {
221 static int log_occurrences = 0;
222 if (log_occurrences <= 64) {
224 LOG(INFO) <<
"Engine " << operator_def.engine()
225 <<
" is not available for operator " << op_type <<
".";
228 VLOG(1) <<
"Using default implementation.";
231 auto op = TryCreateOperator(op_type, operator_def, ws);
234 "Cannot create operator of type '",
237 DeviceTypeName(device_type),
238 "'. Verify that implementation for the corresponding device exist. It " 239 "might also happen if the binary is not linked with the operator " 240 "implementation code. If Python frontend is used it might happen if " 241 "dyndep.InitOpsLibrary call is missing. Operator def: ",
242 ProtoDebugString(operator_def));
248 const std::string OpRegistryKey(
249 const std::string& op_type,
250 const std::string& engine) {
251 if (engine ==
"" || engine ==
"DEFAULT") {
254 return op_type +
"_ENGINE_" + engine;
258 void SetPerOpEnginePref(
const PerOpEnginePrefType& per_op_engine_pref) {
259 for (
const auto& device_pref_pair : per_op_engine_pref) {
260 const auto& device_type = device_pref_pair.first;
262 gDeviceTypeRegistry()->count(device_type),
266 auto* registry = gDeviceTypeRegistry()->at(device_type);
268 for (
const auto& op_pref_pair : device_pref_pair.second) {
269 const auto& op_type = op_pref_pair.first;
271 registry->Has(op_type),
274 " not registered in ",
279 g_per_op_engine_pref() = per_op_engine_pref;
282 void SetGlobalEnginePref(
const GlobalEnginePrefType& global_engine_pref) {
283 for (
const auto& device_pref_pair : global_engine_pref) {
284 const auto& device_type = device_pref_pair.first;
286 gDeviceTypeRegistry()->count(device_type),
291 g_global_engine_pref() = global_engine_pref;
295 const PerOpEnginePrefType& per_op_engine_pref,
296 const GlobalEnginePrefType& global_engine_pref) {
297 SetPerOpEnginePref(per_op_engine_pref);
298 SetGlobalEnginePref(global_engine_pref);
301 void SetOpEnginePref(
302 const std::string& op_type,
303 const CaffeMap<DeviceType, EnginePrefType>& op_pref) {
304 for (
const auto& device_pref_pair : op_pref) {
305 const auto& device_type_proto = device_pref_pair.first;
306 const auto& device_type =
307 ProtoToType(static_cast<DeviceTypeProto>(device_type_proto));
309 gDeviceTypeRegistry()->count(device_type),
314 gDeviceTypeRegistry()->
at(device_type)->Has(op_type),
317 " not registered in ",
320 g_per_op_engine_pref()[device_type][op_type] = device_pref_pair.second;
324 unique_ptr<OperatorBase> CreateOperator(
325 const OperatorDef& operator_def,
329 auto op = _CreateOperator(operator_def, ws);
330 op->set_net_position(net_position);
333 if (net_position != 0) {
334 VLOG(1) <<
"Operator constructor with net position " << net_position
336 ws->last_failed_op_net_position = net_position;
338 VLOG(1) <<
"Failed operator constructor doesn't have an id set";
344 std::map<DeviceType, OperatorRegistry*>* gDeviceTypeRegistry() {
345 static std::map<DeviceType, OperatorRegistry*> g_device_type_registry;
346 return &g_device_type_registry;
354 CAFFE_REGISTER_DEVICE_TYPE(CPU, CPUOperatorRegistry);
357 CUDAOperatorRegistry,
361 CAFFE_REGISTER_DEVICE_TYPE(CUDA, CUDAOperatorRegistry);
368 CAFFE_REGISTER_DEVICE_TYPE(HIP, HIPOperatorRegistry);
374 const vector<GradientWrapper>&);
377 const OperatorDef& def,
const vector<GradientWrapper>& g_output) {
378 std::unique_ptr<GradientMakerBase> maker(
379 GradientRegistry()->Create(def.type(), def, g_output));
381 "Gradient maker for operator ", def.type(),
" not implemented.");
384 if (maker->CopyDeviceOption() && def.has_device_option()) {
385 for (OperatorDef& grad_def : meta.ops_) {
386 grad_def.mutable_device_option()->CopyFrom(def.device_option());
390 if (maker->CopyEngine() && def.has_engine()) {
391 for (OperatorDef& grad_def : meta.ops_) {
392 grad_def.set_engine(def.engine());
396 if (maker->CopyArguments() && def.arg_size()) {
397 for (OperatorDef& grad_def : meta.ops_) {
398 for (
auto& arg : def.arg()) {
399 grad_def.add_arg()->CopyFrom(arg);
404 for (
const OperatorDef& grad_def : meta.ops_) {
405 VLOG(1) <<
"Gradient ops: " << ProtoDebugString(grad_def);
409 CAFFE_ENFORCE_EQ(meta.g_input_.size(), def.input_size());
410 VLOG(1) <<
"Gradients:";
414 if (!grad.IsDense() && !grad.IsSparse()) {
415 VLOG(1) <<
"\t [no gradient]";
416 }
else if (grad.IsDense()) {
417 VLOG(1) <<
"\t [dense]" << grad.dense_;
420 grad.indices_.size() && grad.values_.size(),
421 "For sparse gradient, one should set both indices and values. " 422 "Currently we have: (" +
423 grad.indices_ +
", " + grad.values_ +
").");
424 VLOG(1) <<
"\t [sparse] " << grad.indices_ <<
", " << grad.values_;
430 TensorShapes InferBlobShapesAndTypes(
431 CaffeMap<string, TensorShape>& blob_desc,
432 const vector<NetDef*>& nets) {
433 for (
auto& defptr : nets) {
435 CaffeMap<string, string> unmatched_sum_blobs;
436 CaffeMap<string, TensorShape> reshape_cache;
438 for (
const OperatorDef& op : defptr->op()) {
440 if (op.type().find(
"Dequeue") != std::string::npos ||
441 op.type().find(
"Enqueue") != std::string::npos) {
445 vector<TensorShape> input_desc;
446 bool found_all =
true;
447 for (
const string& in : op.input()) {
448 auto inp_desc = blob_desc.find(in);
449 if (inp_desc == blob_desc.end()) {
450 LOG(WARNING) <<
"Shape and type inference failed for input: " << in
451 <<
" for op " << op.type() <<
", skipping.";
455 input_desc.push_back(inp_desc->second);
460 auto op_schema = OpSchemaRegistry::Schema(op.type());
461 if (op_schema ==
nullptr) {
462 LOG(WARNING) <<
"Shape inference failed, no schema for: " << op.type();
469 if (op.type() ==
"Sum") {
470 TensorShape sum_shape;
471 for (
auto inp : op.input()) {
472 auto it = blob_desc.find(inp);
473 if (it != blob_desc.end() && !it->second.unknown_shape()) {
474 if (it->second.dims_size() > 0) {
475 sum_shape = blob_desc[inp];
480 for (
auto inp : op.input()) {
481 auto it = blob_desc.find(inp);
482 if (it == blob_desc.end() || it->second.unknown_shape()) {
483 blob_desc[inp] = sum_shape;
484 if (sum_shape.dims_size() == 0) {
486 unmatched_sum_blobs[inp] = op.output(0);
492 if (op.type() ==
"Reshape" && op.is_gradient_op()) {
493 CAFFE_ENFORCE(reshape_cache.find(op.input(1)) != reshape_cache.end());
494 TensorShape cached = reshape_cache[op.input(1)];
495 blob_desc[op.output(0)] = cached;
499 std::vector<TensorShape> out;
501 out = op_schema->InferTensor(op, input_desc);
502 if (op.is_gradient_op() && out.size()) {
508 CaffeMap<string, string> grads_to_params =
509 GradientMakerBase::MatchGradsToParams(op);
511 for (
size_t i = 0; i < out.size(); i++) {
512 if (out[i].unknown_shape()) {
513 std::string gradout = op.output(i);
515 if (grads_to_params.find(gradout) != grads_to_params.end()) {
516 std::string var = grads_to_params[gradout];
517 if (blob_desc.find(var) != blob_desc.end()) {
518 out[i] = blob_desc[var];
525 if (op.type() ==
"Reshape") {
528 reshape_cache[op.output(1)] = input_desc[0];
532 LOG(ERROR) <<
"Shape inference error: " << enf.msg();
533 LOG(ERROR) <<
"Operator: " << ProtoDebugString(op) << std::endl;
534 LOG(ERROR) <<
"Returning empty results.";
540 if (out.size() != (unsigned)op.output_size()) {
541 if (op.type() ==
"Slice") {
544 "For Slice operator, either shape of all output blobs are " 545 "inferred or shape of none can be inferred.");
548 "Invalid shape inference for operator ",
552 " outputs, but got ",
556 for (
size_t i = 0; i < out.size(); i++) {
557 blob_desc[op.output(i)] = out[i];
562 for (
auto& unmatched : unmatched_sum_blobs) {
563 if (blob_desc.find(unmatched.second) != blob_desc.end()) {
564 blob_desc[unmatched.first] = blob_desc[unmatched.second];
570 for (
auto kv : blob_desc) {
571 TensorShape& tp = kv.second;
572 TensorShape* tpnew = tps.add_shapes();
574 tpnew->set_name(kv.first);
579 TensorShape GetTensorShapeOfBlob(
const Blob* b) {
580 TypeCall type_fun = GetTypeCallFunction(b->
meta().
id());
581 TensorInfoCall tensor_info_fun = GetTensorInfoFunction(b->
meta().
id());
585 tp.set_data_type(TypeMetaToDataType(type_fun(b->GetRaw())));
587 if (tensor_info_fun) {
589 DeviceOption _device;
590 auto shape = tensor_info_fun(b->GetRaw(), &_capacity, &_device);
591 for (
auto d : shape) {
595 tp.set_unknown_shape(
true);
600 TensorShapes InferBlobShapesAndTypesFromWorkspace(
602 const vector<NetDef*>& nets) {
603 CaffeMap<string, TensorShape> blob_desc;
605 const std::vector<string>& ws_blobs = ws->
Blobs();
606 for (
const auto& s : ws_blobs) {
608 TensorShape tp = GetTensorShapeOfBlob(b);
611 return InferBlobShapesAndTypes(blob_desc, nets);
614 TensorShapes InferBlobShapesAndTypesFromMap(
615 const CaffeMap<std::string, std::vector<int64_t>>& blob_dimensions,
616 const vector<NetDef*>& nets) {
617 CaffeMap<string, TensorShape> blob_desc;
619 for (
const auto& blob : blob_dimensions) {
621 for (
auto d : blob.second) {
622 CAFFE_ENFORCE_GE(d, 0, blob.first);
625 blob_desc[blob.first] = tp;
627 return InferBlobShapesAndTypes(blob_desc, nets);
630 TensorShapes InferBlobShapesAndTypesFromMap(
631 const CaffeMap<std::string, std::vector<int64_t>>& blob_dimensions,
632 const CaffeMap<std::string, TensorProto_DataType>& blob_types,
633 const vector<NetDef*>& nets) {
634 CaffeMap<string, TensorShape> blob_desc;
636 for (
const auto& blob : blob_dimensions) {
638 for (
auto d : blob.second) {
639 CAFFE_ENFORCE_GE(d, 0, blob.first);
642 auto blob_type = blob_types.find(blob.first);
643 if (blob_type == blob_types.end()) {
644 LOG(WARNING) <<
"Missing type of " << blob.first
645 <<
"; assuming to be UNDEFINED";
646 tp.set_data_type(TensorProto_DataType_UNDEFINED);
648 tp.set_data_type(blob_type->second);
650 blob_desc[blob.first] = tp;
652 return InferBlobShapesAndTypes(blob_desc, nets);
655 std::map<string, std::pair<DeviceOption, DeviceOption>> ValidateTensorDevices(
657 const OperatorDef& op_def) {
658 std::map<string, std::pair<DeviceOption, DeviceOption>> mismatches;
659 DeviceOption op_device = op_def.device_option();
661 #ifndef CAFFE2_NO_OPERATOR_SCHEMA 663 auto op_schema = OpSchemaRegistry::Schema(op_def.type());
664 if (op_schema !=
nullptr) {
665 if (op_schema->inputs_can_cross_devices()) {
669 #endif // CAFFE2_NO_OPERATOR_SCHEMA 671 auto Check = [&](
const Blob& blob, std::string blob_name) {
672 TensorInfoCall tensor_info_fun = GetTensorInfoFunction(blob.
meta().
id());
673 if (tensor_info_fun) {
675 DeviceOption blob_device;
677 const_cast<Blob&>(blob).GetRaw(),
681 if ((blob_device.device_type() == PROTO_CUDA ||
682 blob_device.device_type() == PROTO_HIP) &&
683 blob_device.device_id() != op_device.device_id()) {
684 mismatches[blob_name] = std::make_pair(op_device, blob_device);
690 for (
int i = 0; i < op.InputSize(); i++) {
691 Check(op.InputBlob(i), op_def.input(i));
693 for (
int i = 0; i < op.OutputSize(); i++) {
694 Check(*op.OutputBlob(i), op_def.output(i));
699 std::set<std::string> GetRegisteredOperators() {
700 std::set<std::string> all_keys;
703 for (
const auto& name : CPUOperatorRegistry()->Keys()) {
704 all_keys.emplace(name);
707 for (
const auto& name : CUDAOperatorRegistry()->Keys()) {
708 all_keys.emplace(name);
712 for (
const auto& name : HIPOperatorRegistry()->Keys()) {
713 all_keys.emplace(name);
719 static std::function<void(const OperatorDef&)> OperatorLogger =
720 [](
const OperatorDef&) {
return; };
722 void SetOperatorLogger(std::function<
void(
const OperatorDef&)> tracer) {
723 OperatorLogger = tracer;
726 std::function<void(const OperatorDef&)> GetOperatorLogger() {
727 return OperatorLogger;
Blob is a general container that hosts a typed pointer.
GradientOpsMeta GetGradientForOp(const OperatorDef &def, const vector< GradientWrapper > &g_output)
Gets the GradientOpsMeta for the given operator def.
The primary ATen error class.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
const Blob * GetBlob(const string &name) const
Gets the blob with the given name as a const pointer.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
const TypeMeta & meta() const noexcept
Returns the meta info of the blob.
To register your own kernel for an operator, do in one (!) cpp file: C10_REGISTER_KERNEL(OperatorHand...
Flush-To-Zero and Denormals-Are-Zero mode.
vector< string > Blobs() const
Return a list of blob names.