Caffe2 - C++ API
A deep learning, cross platform ML framework
rewrite_net.cc
1 
18 #include "rewrite_net.h"
19 #include "caffe2/core/operator.h"
20 #include "caffe2/utils/proto_utils.h"
21 #include <unordered_map>
22 #include <unordered_set>
23 
24 #ifdef CAFFE2_ANDROID
25 #include "../android/AndroidGLContext.h"
26 #endif
27 
28 namespace caffe2 {
29 
30 struct Analysis {
31  struct SSA {
32  using BlobVersions = std::unordered_map<std::string, size_t>;
33  BlobVersions inVersions;
34  BlobVersions outVersions;
35  };
36  std::vector<SSA> ssa;
37  std::unordered_map<std::string, std::unordered_map<size_t, std::vector<size_t>>> inUsages;
38 };
39 
40 static Analysis analyzeNet(const NetDef& net) {
41  Analysis::SSA::BlobVersions frontier;
42  Analysis analysis;
43 
44  auto play = [&](size_t i, const OperatorDef& op) {
45  Analysis::SSA::BlobVersions inVersions;
46  for (const auto& s : op.input()) {
47  inVersions[s] = frontier[s];
48  analysis.inUsages[s][frontier[s]].push_back(i);
49  }
50  Analysis::SSA::BlobVersions outVersions;
51  for (const auto& s : op.output()) {
52  if (frontier.find(s) != frontier.end()) {
53  frontier[s] += 1;
54  }
55  outVersions[s] = frontier[s];
56  }
57  analysis.ssa.push_back(Analysis::SSA{inVersions, outVersions});
58  };
59 
60  for (auto i = 0; i < net.op_size(); ++i) {
61  play(i, net.op(i));
62  }
63  return analysis;
64 }
65 
66 static void insertCopyToGPUOp(NetDef& predictNet, const std::string& cpu_blob) {
67  auto* op = predictNet.add_op();
68  op->set_name("CopyToOpenGL");
69  op->set_type("CopyToOpenGL");
70  op->add_input(cpu_blob);
71  op->add_output(cpu_blob + "_M");
72 }
73 
74 static void insertCopyFromGPUOp(NetDef& predictNet, const std::string& cpu_blob) {
75  // add argument "is_last" to the last op to signal this is the last operator before the
76  // CopyFromOpenGL op
77  auto* last_op = predictNet.mutable_op(predictNet.op_size() - 1);
78  auto* arg = last_op->add_arg();
79  arg->set_name("is_last");
80  arg->set_i(1);
81 
82  auto* op = predictNet.add_op();
83  op->set_name("CopyFromOpenGL");
84  op->set_type("CopyFromOpenGL");
85  op->add_input(cpu_blob + "_M");
86  op->add_output(cpu_blob);
87 }
88 
89 static NetDef insertInputOutputCopyOps(const NetDef& def, std::unordered_set<std::string>& glOps) {
90  // Do some validation of the outputs. For this version, we require:
91  // - a single input (first element of external_input()) is consumed by the NetDef
92  // - a single output (first element of external_output()) is produced by the NetDef.
93  // - the input is consumed by def.op(0), and this is the only consumer.
94  // - the output is produced by def.op(-1).
95  CAFFE_ENFORCE_GE(def.external_input_size(), 1);
96  CAFFE_ENFORCE_GE(def.external_output_size(), 1);
97  auto analysis = analyzeNet(def);
98  // enforce a single use of the input blob.
99  CAFFE_ENFORCE_GE(def.op_size(), 1);
100 
101  const auto& inputBlob = def.external_input(0);
102  // Enforce that the input blob has a single usage - in the first operator.
103  CAFFE_ENFORCE(analysis.inUsages[inputBlob][0] == (std::vector<size_t>{0}));
104  // Enforce that the external_output(0) blob is produced by the last operator in this sequence.
105  const auto& outputBlob = def.external_output(0);
106  CAFFE_ENFORCE(analysis.ssa.back().outVersions.find(outputBlob) !=
107  analysis.ssa.back().outVersions.end());
108  const auto& outputBlobVersion = analysis.ssa.back().outVersions[outputBlob];
109  // This should hold true by definition of the SSA analysis.
110  CAFFE_ENFORCE(analysis.inUsages[outputBlob].find(outputBlobVersion) ==
111  analysis.inUsages[outputBlob].end());
112 
113  NetDef mdef;
114  mdef.CopyFrom(def);
115  mdef.clear_op();
116 
117  std::unordered_map<std::string, std::set<size_t>> cpu_blobs, gpu_blobs;
118  cpu_blobs[def.external_input(0)].insert(0);
119 
120  for (auto i = 0; i < def.op_size(); i++) {
121  const auto& currentOp = def.op(i);
122  if (glOps.count(currentOp.type()) > 0) {
123  // OpenGL Op
124  // insert copyToOpenGLOp
125  for (auto j = 0; j < currentOp.input_size(); j++) {
126  auto& input = currentOp.input(j);
127  auto version = analysis.ssa[i].inVersions[input];
128  if (cpu_blobs[input].count(version) > 0) {
129  insertCopyToGPUOp(mdef, input);
130  gpu_blobs[input].insert(version);
131  cpu_blobs[input].erase(version);
132  }
133  // Only the first input should be OpenGL texture
134  // Otherwise, copyToOpenGLOp will be inserted for the weights,
135  // which are outputs of QuantDecode
136  if (currentOp.type().find("OpenGLConv") == 0) {
137  if (j == 0) {
138  break;
139  }
140  }
141  }
142 
143  auto* op = mdef.add_op();
144  op->CopyFrom(currentOp);
145 
146  // swap input blob
147  for (auto j = 0; j < currentOp.input_size(); j++) {
148  auto& input = currentOp.input(j);
149  auto version = analysis.ssa[i].inVersions[input];
150  if (gpu_blobs[input].count(version) > 0) {
151  op->set_input(j, input + "_M");
152  }
153  }
154 
155  // swap output blob
156  for (auto j = 0; j < currentOp.output_size(); j++) {
157  auto& output = currentOp.output(j);
158  auto version = analysis.ssa[i].outVersions[output];
159  op->set_output(j, output + "_M");
160  gpu_blobs[output].insert(version);
161  }
162  // insert copyFromOpenGLOp after the last op if the last op is an OpenGL op
163  if (i == def.op_size() - 1) {
164  insertCopyFromGPUOp(mdef, currentOp.output(0));
165  }
166  } else {
167  // CPU Op
168  // insert copyFromOpenGLOp
169  for (auto j = 0; j < currentOp.input_size(); j++) {
170  auto& input = currentOp.input(j);
171  auto version = analysis.ssa[i].inVersions[input];
172  if (gpu_blobs[input].count(version) > 0) {
173  insertCopyFromGPUOp(mdef, input);
174  }
175  }
176  auto* op = mdef.add_op();
177  op->CopyFrom(currentOp);
178  for (auto j = 0; j < currentOp.output_size(); j++) {
179  auto& output = currentOp.output(j);
180  auto version = analysis.ssa[i].outVersions[output];
181  cpu_blobs[output].insert(version);
182  }
183  }
184  }
185  return mdef;
186 }
187 
188 static bool tryFuseAdjacentOps(const OperatorDef& currentOp,
189  const OperatorDef& nextOp,
190  OperatorDef* fusedOp,
191  std::unordered_set<std::string>& glOps) {
192  // Check for possible invalid opportunities.
193  if (currentOp.output_size() != 1 || nextOp.output_size() != 1) {
194  return false;
195  }
196  // The fused op cannot be inplace
197  if (currentOp.output(0) != nextOp.input(0) || currentOp.input(0) == nextOp.output(0)) {
198  return false;
199  }
200 
201  static const std::map<std::pair<std::string, std::string>, std::string> fusionOpportunities = {
202  {{"OpenGLInstanceNorm", "OpenGLPRelu"}, "OpenGLInstanceNormPRelu"},
203  {{"OpenGLConv", "OpenGLPRelu"}, "OpenGLConvPRelu"},
204  {{"OpenGLConv", "OpenGLRelu"}, "OpenGLConvRelu"},
205  {{"OpenGLConvTranspose", "OpenGLPRelu"}, "OpenGLConvTransposePRelu"}};
206  auto it = fusionOpportunities.find({currentOp.type(), nextOp.type()});
207  if (it == fusionOpportunities.end()) {
208  return false;
209  }
210 
211  glOps.insert(it->second);
212  fusedOp->CopyFrom(currentOp);
213  fusedOp->set_output(0, nextOp.output(0));
214  fusedOp->set_type(it->second);
215  for (auto i = 1; i < nextOp.input_size(); i++) {
216  fusedOp->add_input(nextOp.input(i));
217  }
218  return true;
219 }
220 
221 static NetDef runOpenGLFusion(const NetDef& def, std::unordered_set<std::string>& glOps) {
222  CHECK_GE(def.op_size(), 1);
223  NetDef mdef;
224  mdef.CopyFrom(def);
225  mdef.clear_op();
226  auto i = 0;
227 
228  while (i < def.op_size()) {
229  if (i == def.op_size() - 1) {
230  VLOG(2) << "Last operator, skipping";
231  auto* op = mdef.add_op();
232  op->CopyFrom(def.op(i));
233  i += 1;
234  continue;
235  }
236 
237  const auto& currentOp = def.op(i);
238  const auto& nextOp = def.op(i + 1);
239  OperatorDef fusedOp;
240  if (tryFuseAdjacentOps(currentOp, nextOp, &fusedOp, glOps)) {
241  VLOG(2) << "Found an adjacent fusion for: " << currentOp.type() << ", " << nextOp.type();
242  // We can fuse.
243  auto* op = mdef.add_op();
244  op->CopyFrom(fusedOp);
245  i += 2;
246  continue;
247  }
248  VLOG(2) << "No fusion available for: " << currentOp.type() << ", " << nextOp.type();
249  // Just emit the current type.
250  auto* op = mdef.add_op();
251  op->CopyFrom(currentOp);
252  i += 1;
253  }
254  return mdef;
255 }
256 
257 void dumpDefForOpenGL(const NetDef& d) {
258  for (const auto& op : d.op()) {
259  LOG(INFO) << op.input(0) << " -> " << op.type() << " -> " << op.output(0);
260  }
261 }
262 
263 // // For debugging
264 // void dumpDefForOpenGL(const NetDef &net) {
265 // for (const auto &op : net.op()) {
266 // printf("***Operator: %s\n", op.type().c_str());
267 // for (auto input : op.input()) {
268 // printf("\tInput: %s\n", input.c_str());
269 // }
270 //
271 // for (auto output : op.output()) {
272 // printf("\tOutput: %s\n", output.c_str());
273 // }
274 // }
275 //}
276 
277 NetDef rewritePredictNetForOpenGL(const NetDef& predictNet, bool useTextureInput, bool useTiling, bool runFusion) {
278  CAFFE_ENFORCE_GE(predictNet.op_size(), 1);
279  NetDef net;
280  net.CopyFrom(predictNet);
281 
282  std::unordered_map<std::string, std::string> replacements(
283  {{"OpenGLPackedInt8BGRANHWCToNCHWCStylizerPreprocess",
284  useTextureInput ? "OpenGLTextureToTextureStylizerPreprocess"
285  : "OpenGLTensorToTextureStylizerPreprocess"},
286  {"OpenGLBRGNCHWCToPackedInt8BGRAStylizerDeprocess",
287  useTextureInput ? "OpenGLTextureToTextureStylizerDeprocess"
288  : "OpenGLTextureToTensorStylizerDeprocess"}});
289 
290  std::unordered_set<std::string> openGLOps; // Used to insert copy ops
291  bool needCopyOps = false;
292 
293  const auto& opKeyList = CPUOperatorRegistry()->Keys();
294  auto opKeySet = std::set<std::string>(opKeyList.begin(), opKeyList.end());
295 
296 #ifdef CAFFE2_ANDROID
297  // TODO: debug InstanceNorm models on Mali devices
298  AndroidGLContext* context = (AndroidGLContext*)GLContext::getGLContext();
299  if (context->get_platform() == Mali) {
300  opKeySet.erase("OpenGLInstanceNorm");
301  opKeySet.erase("OpenGLInstanceNormPRelu");
302  }
303 #endif
304  for (auto i = 0; i < net.op_size(); ++i) {
305  auto* op = net.mutable_op(i);
306  string openGLOp = std::string("OpenGL") + op->type();
307  if (replacements.count(openGLOp) > 0) {
308  openGLOp = replacements[openGLOp];
309  }
310 
311  if (opKeySet.find(openGLOp) != opKeySet.end()) {
312  op->set_type(openGLOp);
313  openGLOps.insert(openGLOp);
314 
315  if (useTiling) {
316  auto* arg = op->add_arg();
317  arg->set_name("tiling");
318  arg->set_i(1);
319  }
320  } else {
321  needCopyOps = true;
322  }
323  }
324 
325  if (useTextureInput && needCopyOps) {
326  CAFFE_THROW("OpenGL operator missing");
327  }
328 
329  if (runFusion) {
330  net = runOpenGLFusion(net, openGLOps);
331  }
332 
333  if (net.op(0).type() == replacements["OpenGLPackedInt8BGRANHWCToNCHWCStylizerPreprocess"]) {
334  // For end-to-end testing
335  if (net.op(net.op_size() - 1).type() !=
336  replacements["OpenGLBRGNCHWCToPackedInt8BGRAStylizerDeprocess"]) {
337  auto* last_op = net.mutable_op(net.op_size() - 1);
338  auto output = last_op->output(0) + "M";
339  last_op->set_output(0, output);
340  auto* copy_op = net.add_op();
341  copy_op->set_name("CopyFromOpenGL");
342  copy_op->set_type("CopyFromOpenGL");
343  copy_op->add_input(output);
344  // rename output blob in case input and output blob has the same name
345  copy_op->add_output(net.external_output(0));
346  }
347  } else {
348  if (!useTextureInput) {
349  needCopyOps = true;
350  }
351  }
352 
353  // copy ops are needed when the input is not a texture
354  if (needCopyOps) {
355  // For non style transfer cases
356  net = insertInputOutputCopyOps(net, openGLOps);
357  }
358 
359  return net;
360 }
361 
362 bool tryConvertToOpenGL(const NetDef& initNet,
363  const NetDef& predictNet,
364  NetDef* glPredictNet,
365  bool useTextureInput,
366  bool useTiling,
367  bool runFusion) {
368  try {
369  // Throws if unsupported operators are found.
370  *glPredictNet = rewritePredictNetForOpenGL(predictNet, useTextureInput, useTiling, runFusion);
371  dumpDefForOpenGL(*glPredictNet);
372  // Throws if unsupported parameters are found.
373  Workspace ws;
374  ws.RunNetOnce(initNet);
375  ws.CreateNet(*glPredictNet);
376  LOG(INFO) << "OpenGL is successfully enabled";
377  return true;
378  } catch (const std::exception& e) {
379  LOG(ERROR) << "Caught exception trying to convert NetDef to OpenGL: " << e.what();
380  return false;
381  }
382 }
383 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:63
Copyright (c) 2016-present, Facebook, Inc.
NetBase * CreateNet(const NetDef &net_def, bool overwrite=false)
Creates a network with the given NetDef, and returns the pointer to the network.
Definition: workspace.cc:234