Caffe2 - C++ API
A deep learning, cross platform ML framework
backend_cutting.cc
1 #include "caffe2/opt/backend_cutting.h"
2 #include "caffe2/core/logging.h"
3 #include "caffe2/opt/converter.h"
4 #include "nomnigraph/Converters/Dot.h"
5 #include "nomnigraph/Representations/NeuralNet.h"
6 
7 #include <algorithm>
8 #include <fstream>
9 #include <queue>
10 
11 namespace caffe2 {
12 namespace opt {
13 
14 namespace {
15 
16 using namespace nom::repr;
17 using NodeRef = NNGraph::NodeRef;
18 using EdgeRef = NNGraph::EdgeRef;
19 
20 struct GroupAnnotation {
21  GroupAnnotation(int i, int g = -1) : group(g), in_degree(i) {}
22  int group;
23  int in_degree;
24  bool needs_transform{true};
25 };
26 
27 std::string ShowNode(NodeRef node) {
28  if (nn::is<NeuralNetData>(node)) {
29  const auto* nn_tensor = nn::get<NeuralNetData>(node);
30  return c10::str("Tensor: ", nn_tensor->getName());
31  } else if (nn::is<NeuralNetOperator>(node)) {
32  const auto* nn_op = nn::get<NeuralNetOperator>(node);
33  const auto& op_def =
34  dyn_cast<Caffe2Annotation>(nn_op->getAnnotation())->getOperatorDef();
35  return c10::str("Op: ", op_def.type());
36  } else {
37  CAFFE_THROW("Known node");
38  }
39 }
40 
41 void DumpGraph(NNGraph* g) {
42  auto nnprinter = [](typename NNGraph::NodeRef node) {
43  std::map<std::string, std::string> labelMap;
44  assert(node->data() && "Node doesn't have data, can't render it");
45  if (isa<NeuralNetOperator>(node->data())) {
46  auto* op = dyn_cast<NeuralNetOperator>(node->data().get());
47  labelMap["label"] =
48  op->getName() + " (" + c10::to_string((unsigned long long)node) + ")";
49  auto* annotation = op->getAnnotation();
50  if (annotation && isa<Caffe2Annotation>(annotation)) {
51  auto device_annotation = dyn_cast<Caffe2Annotation>(annotation);
52  labelMap["label"] += "\\n[" + device_annotation->getDevice() + "]";
53  auto hash = std::hash<std::string>{}(device_annotation->getDevice());
54  std::stringstream hex_stream;
55  hex_stream << std::hex << hash;
56  labelMap["color"] = "#" + hex_stream.str().substr(0, 6);
57  labelMap["fontcolor"] = labelMap["color"];
58  }
59  labelMap["shape"] = "box";
60  } else if (isa<Data>(node->data())) {
61  auto tensor = dyn_cast<NeuralNetData>(node->data().get());
62  labelMap["label"] = tensor->getName();
63  labelMap["label"] += "_" + c10::to_string(tensor->getVersion()) + " " +
64  c10::to_string((unsigned long long)node);
65  }
66  return labelMap;
67  };
68 
69  std::ofstream out("dump.dot");
70  out << nom::converters::convertToDotString(g, nnprinter);
71  out.close();
72 }
73 
74 struct VisitorContext {
75  VisitorContext(std::function<bool(const caffe2::OperatorDef&)> func)
76  : predicate(func) {}
77 
78  std::unordered_map<NodeRef, GroupAnnotation> infos;
79  std::unordered_set<NodeRef> frontier;
80  std::vector<NodeRef> current_group;
81  std::function<bool(const caffe2::OperatorDef&)> predicate;
82 
83  int group{0};
84  bool find_supported{true};
85 };
86 
87 GroupAnnotation& GetInfo(
88  std::unordered_map<NodeRef, GroupAnnotation>& infos,
89  NodeRef node) {
90  auto it = infos.find(node);
91  CAFFE_ENFORCE(it != infos.end(), "Node info not found for ", ShowNode(node));
92  return it->second;
93 }
94 
95 const GroupAnnotation& GetInfo(
96  const std::unordered_map<NodeRef, GroupAnnotation>& infos,
97  NodeRef node) {
98  auto it = infos.find(node);
99  CAFFE_ENFORCE(
100  it != infos.end(), "Const node info not found for ", ShowNode(node));
101  return it->second;
102 }
103 
104 // Explore the graph in topological order until we hit stopping nodes. This is
105 // based on Khan's algorithm:
106 // https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm
107 // Precondition: nodes in `current_frontier` must have satisfy `in_degree == 0`
108 void Explore(
109  const std::vector<NodeRef>& current_frontier,
110  VisitorContext* context) {
111  std::queue<NodeRef> q;
112  for (const auto n : current_frontier) {
113  q.push(n);
114  }
115 
116  while (!q.empty()) {
117  auto node = q.front();
118  q.pop();
119  auto& info = GetInfo(context->infos, node);
120 
121  // Check if the node is supported, stop exploring further if not supported
122  if (nn::is<NeuralNetOperator>(node)) {
123  const auto* nn_op = nn::get<NeuralNetOperator>(node);
124  const auto& op_def =
125  dyn_cast<Caffe2Annotation>(nn_op->getAnnotation())->getOperatorDef();
126  bool wanted = context->predicate(op_def);
127  wanted = context->find_supported ? wanted : (!wanted);
128  if (!wanted) {
129  context->frontier.emplace(node);
130  continue;
131  }
132  }
133 
134  // Adding to current group
135  info.group = context->group;
136  info.needs_transform = context->find_supported;
137  context->current_group.push_back(node);
138 
139  // Continue exploring its fanouts
140  for (const auto& out_edge : node->getOutEdges()) {
141  auto child_node = out_edge->head();
142  auto& child_info = GetInfo(context->infos, child_node);
143  if (--child_info.in_degree == 0) {
144  q.push(child_node);
145  }
146  }
147  }
148 }
149 
150 // Note: subgraph always starts with ops and ends with tensors, except for the
151 // very first group, which can be all tensors
152 struct TransformSubgraph {
153  explicit TransformSubgraph(
154  std::vector<NodeRef>&& f,
155  std::vector<NodeRef>&& n,
156  int id,
157  bool need)
158  : input_nodes(std::move(f)),
159  nodes(std::move(n)),
160  group_id(id),
161  needed(need) {}
162 
163  TransformSubgraph(TransformSubgraph&& rhs) noexcept
164  : input_nodes(std::move(rhs.input_nodes)),
165  nodes(std::move(rhs.nodes)),
166  external_input_refs(std::move(rhs.external_input_refs)),
167  external_output_refs(std::move(rhs.external_output_refs)),
168  group_id(rhs.group_id),
169  needed(rhs.needed) {}
170 
171  TransformSubgraph& operator=(TransformSubgraph&& rhs) noexcept {
172  input_nodes = std::move(rhs.input_nodes);
173  nodes = std::move(rhs.nodes);
174  external_input_refs = std::move(rhs.external_input_refs);
175  external_output_refs = std::move(rhs.external_output_refs);
176  group_id = rhs.group_id;
177  needed = rhs.needed;
178  return *this;
179  }
180 
181  void Print() const {
182  LOG(INFO) << "Group :" << group_id;
183  LOG(INFO) << " Input Nodes: ";
184  for (const auto i : input_nodes) {
185  LOG(INFO) << " " << ShowNode(i);
186  }
187  LOG(INFO) << " Nodes: ";
188  for (const auto i : nodes) {
189  LOG(INFO) << " " << ShowNode(i);
190  }
191  }
192 
193  std::vector<NodeRef> input_nodes;
194  std::vector<NodeRef> nodes;
195  std::unordered_map<std::string, NodeRef> external_input_refs;
196  std::unordered_map<std::string, NodeRef> external_output_refs;
197  int group_id{-1};
198  bool needed{true};
199 };
200 
201 caffe2::NetDef ConvertToC2Net(
202  const TransformSubgraph& sub,
203  const std::unordered_map<NodeRef, GroupAnnotation>& infos) {
204  caffe2::NetDef net;
205  for (auto node : sub.nodes) {
206  if (nn::is<NeuralNetOperator>(node)) {
207  const auto* nn_op = nn::get<NeuralNetOperator>(node);
208  assert(
209  isa<Caffe2Annotation>(nn_op->getAnnotation()) &&
210  "Cannot get caffe2 op from NNOp");
211  const auto& op_def =
212  dyn_cast<Caffe2Annotation>(nn_op->getAnnotation())->getOperatorDef();
213  net.add_op()->CopyFrom(op_def);
214  }
215  }
216  for (const auto kv : sub.external_input_refs) {
217  net.add_external_input(kv.first);
218  VLOG(2) << "Adding external input: " << kv.first;
219  }
220  for (const auto& kv : sub.external_output_refs) {
221  net.add_external_output(kv.first);
222  VLOG(2) << "Adding external output: " << kv.first;
223  }
224 
225  return net;
226 }
227 
228 void DetectBoundaryReferences(
229  TransformSubgraph* subgraph,
230  const std::unordered_map<NodeRef, GroupAnnotation>& infos,
231  const std::unordered_set<std::string>& original_external_output) {
232  for (auto node : subgraph->nodes) {
233  // inputs
234  for (auto in_edge : node->getInEdges()) {
235  auto parent_node = in_edge->tail();
236  const auto& info = GetInfo(infos, parent_node);
237  if (info.group != subgraph->group_id &&
238  nn::is<NeuralNetData>(parent_node)) {
239  const auto* nn_tensor = nn::get<const NeuralNetData>(parent_node);
240  subgraph->external_input_refs.emplace(
241  nn_tensor->getName(), parent_node);
242  }
243  }
244 
245  // outputs
246  if (!nn::is<NeuralNetData>(node)) {
247  continue;
248  }
249  // Note that although matched subgraph won't contain external inputs as we
250  // skip the initial input tensor of matching, it is possible to contain
251  // external outputs. We will mark these external outputs as boundary outputs
252  // too.
253  auto name = nn::get<const NeuralNetData>(node)->getName();
254  if (original_external_output.count(name)) {
255  subgraph->external_output_refs.emplace(name, node);
256  } else {
257  for (auto child_node : nn::getConsumers(node)) {
258  const auto& info = GetInfo(infos, child_node);
259  if (info.group != subgraph->group_id) {
260  subgraph->external_output_refs.emplace(name, node);
261  break;
262  }
263  }
264  }
265  }
266 }
267 
268 void ReplaceSubgraph(
269  const TransformSubgraph& subgraph,
270  caffe2::NetDef& net_opt,
271  NNGraph* g) {
272  // Delete the old subgraph starting from the input nodes until we hit boundary
273  // tensors
274  for (auto node : subgraph.nodes) {
275  if (nn::is<NeuralNetData>(node) &&
276  subgraph.external_output_refs.count(
277  nn::get<const NeuralNetData>(node)->getName())) {
278  VLOG(2) << "Keeping " << ShowNode(node);
279  continue;
280  }
281  VLOG(2) << "Deleting " << ShowNode(node);
282  g->deleteNode(node);
283  }
284 
285  // Convert new NetDef back to NNGraph
286  std::unordered_map<std::string, NodeRef> tensor_map;
287  for (const auto kv : subgraph.external_input_refs) {
288  tensor_map.emplace(kv.first, kv.second);
289  }
290  for (const auto kv : subgraph.external_output_refs) {
291  tensor_map.emplace(kv.first, kv.second);
292  }
293  for (auto& op : *net_opt.mutable_op()) {
294  auto op_node = g->createNode();
295  for (const auto& input : op.input()) {
296  if (!tensor_map.count(input)) {
297  tensor_map[input] =
298  g->createNode(caffe2::make_unique<nom::repr::Tensor>(input));
299  }
300 
301  auto tensor_node = tensor_map[input];
302  g->createEdge(tensor_node, op_node);
303  }
304 
305  for (const auto& output : op.output()) {
306  if (!tensor_map.count(output)) {
307  tensor_map[output] =
308  g->createNode(caffe2::make_unique<nom::repr::Tensor>(output));
309  }
310  auto tensor_node = tensor_map[output];
311  g->createEdge(op_node, tensor_node);
312  }
313 
314  op_node->resetData(convertToNeuralNetOperator(op));
315  }
316 }
317 
318 void PruneUnrefereredNodes(NNModule* nn) {
319  auto& g = nn->dataFlow;
320  std::vector<NodeRef> to_delete;
321  for (auto node : g.getMutableNodes()) {
322  if (!nn::hasProducer(node) && !nn::hasConsumer(node)) {
323  to_delete.push_back(node);
324  }
325  }
326  for (auto i : to_delete) {
327  if (nn::is<NeuralNetData>(i)) {
328  auto name = nn::get<NeuralNetData>(i)->getName();
329  auto it = nn->inputs.find(i);
330  if (it != nn->inputs.end()) {
331  VLOG(2) << "Removing external input " << name;
332  nn->inputs.erase(it);
333  }
334  it = nn->outputs.find(i);
335  if (it != nn->outputs.end()) {
336  VLOG(2) << "Removing external output " << name;
337  nn->outputs.erase(it);
338  }
339  }
340  g.deleteNode(i);
341  }
342 }
343 
344 } // namespace
345 
346 caffe2::NetDef OptimizeForBackend(
347  caffe2::NetDef& net,
348  std::function<bool(const caffe2::OperatorDef&)> supports,
349  std::function<caffe2::NetDef(const caffe2::NetDef&)> transform_func,
350  bool debug) {
351  auto nn = convertToNNModule(net);
352  auto& dfg = nn.dataFlow;
353 
354  // Initialize the group info and figure out the external/input output
355  VisitorContext context(supports);
356  std::vector<NodeRef> external_inputs;
357  std::unordered_set<std::string> external_outputs;
358  for (auto node : dfg.getMutableNodes()) {
359  context.infos.emplace(
360  std::piecewise_construct,
361  std::forward_as_tuple(node),
362  std::forward_as_tuple(node->getInEdges().size(), -1));
363 
364  if (!nn::is<NeuralNetOperator>(node)) {
365  if (!nn::hasProducer(node)) {
366  external_inputs.push_back(node);
367  }
368  if (!nn::hasConsumer(node)) {
369  external_outputs.emplace(nn::get<const NeuralNetData>(node)->getName());
370  }
371  }
372  }
373 
374  // Find unsupported and supported groups of nodes alernatively
375  context.frontier.clear();
376  context.current_group.clear();
377  context.find_supported = false;
378  std::vector<TransformSubgraph> subs;
379  for (std::vector<NodeRef> frontier(
380  external_inputs.begin(), external_inputs.end());
381  !frontier.empty();
382  context.find_supported = !context.find_supported) {
383  Explore(frontier, &context);
384  if (context.find_supported) {
385  subs.emplace_back(
386  std::move(frontier),
387  std::move(context.current_group),
388  context.group,
389  context.find_supported);
390  }
391 
392  frontier.assign(context.frontier.begin(), context.frontier.end());
393  context.frontier.clear();
394  context.current_group.clear();
395  context.group++;
396  }
397 
398  // Transform needed subgraphs one by one
399  std::vector<caffe2::NetDef> opt_subnets;
400  opt_subnets.reserve(subs.size());
401  for (auto& g : subs) {
402  // Generate boundary input/output edges
403  DetectBoundaryReferences(&g, context.infos, external_outputs);
404 
405  caffe2::NetDef subnet = ConvertToC2Net(g, context.infos);
406  // Transform the subgraph protobuf def, note that we can have less external
407  // inputs/outputs but not more
408  opt_subnets.emplace_back(transform_func(subnet));
409 
410  ReplaceSubgraph(g, opt_subnets.back(), &dfg);
411  }
412 
413  // Prune dangling nodes, because after transformation, some weights might be
414  // absorbed
415  PruneUnrefereredNodes(&nn);
416 
417  if (debug) {
418  DumpGraph(&dfg);
419  }
420 
421  auto new_net = convertToCaffe2Proto(nn);
422  new_net.set_name(net.name() + "_opt");
423  return new_net;
424 }
425 
426 } // namespace opt
427 } // namespace caffe2
NodeRef createNode(T &&data)
Creates a node and retains ownership of it.
Definition: Graph.h:240
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
void deleteNode(NodeRef n)
Deletes a node from the graph.
Definition: Graph.h:460
repr::NNModule convertToNNModule(const caffe2::NetDef &net, bool strict, std::vector< repr::NNGraph::NodeRef > *opNodeVec)
Ingest a caffe2 protobuf model and output an NNModule.
Definition: converter.cc:301
A simple graph implementation.
Definition: Graph.h:29
EdgeRef createEdge(NodeRef tail, NodeRef head, U...data)
Creates a directed edge and retains ownership of it.
Definition: Graph.h:415