Caffe2 - Python API
A deep learning, cross platform ML framework
caffe_translator.py
1 ## @package caffe_translator
2 # Module caffe2.python.caffe_translator
3 #!/usr/bin/env python2
4 
5 import argparse
6 import copy
7 import logging
8 import re
9 import numpy as np # noqa
10 
11 from caffe2.proto import caffe2_pb2, caffe2_legacy_pb2
12 from caffe.proto import caffe_pb2
13 from caffe2.python import core, utils, workspace
14 from google.protobuf import text_format
15 
16 logging.basicConfig()
17 log = logging.getLogger("caffe_translator")
18 log.setLevel(logging.INFO)
19 
20 
21 def _StateMeetsRule(state, rule):
22  """A function that reproduces Caffe's StateMeetsRule functionality."""
23  if rule.HasField('phase') and rule.phase != state.phase:
24  return False
25  if rule.HasField('min_level') and state.level < rule.min_level:
26  return False
27  if rule.HasField('max_level') and state.level > rule.max_level:
28  return False
29  curr_stages = set(list(state.stage))
30  # all stages in rule.stages should be in, otherwise it's not a match.
31  if len(rule.stage) and any([s not in curr_stages for s in rule.stage]):
32  return False
33  # none of the stage in rule.stages should be in, otherwise it's not a match.
34  if len(rule.not_stage) and any([s in curr_stages for s in rule.not_stage]):
35  return False
36  # If none of the nonmatch happens, return True.
37  return True
38 
39 
40 def _ShouldInclude(net_state, layer):
41  """A function that reproduces Caffe's inclusion and exclusion rule."""
42  ret = (len(layer.include) == 0)
43  # check exclude rules: if any exclusion is met, we shouldn't include.
44  ret &= not any([_StateMeetsRule(net_state, rule) for rule in layer.exclude])
45  if len(layer.include):
46  # check include rules: if any inclusion is met, we should include.
47  ret |= any([_StateMeetsRule(net_state, rule) for rule in layer.include])
48  return ret
49 
50 
51 def _GetLegacyDims(net, net_params, dummy_input, legacy_pad_ops):
52  dim_map = {}
53  ws = workspace.C.Workspace()
54  for param in net_params.protos:
55  ws.create_blob(param.name) \
56  .feed(utils.Caffe2TensorToNumpyArray(param))
57  external_input = net.op[0].input[0]
58  ws.create_blob(external_input).feed(dummy_input)
59  # Get dimensions with legacy pad
60  for i in range(len(net.op)):
61  op_def = net.op[i]
62  ws._run_operator(op_def.SerializeToString())
63  if i in legacy_pad_ops:
64  output = op_def.output[0]
65  blob_legacy = ws.fetch_blob(output)
66  dim_map[i] = blob_legacy.shape
67  return dim_map
68 
69 
70 def _GetLegacyPadArgs(op_def, arg_map):
71  pads = {}
72  keys = ['pad_l', 'pad_t', 'pad_r', 'pad_b']
73  is_pad = 'pad' in arg_map
74  if is_pad:
75  for k in keys:
76  pads[k] = arg_map['pad'].i
77  else:
78  pads = {x: arg_map[x].i for x in keys}
79  return pads
80 
81 
82 def _AdjustDims(op_def, arg_map, pads, dim1, dim2):
83  n1, c1, h1, w1 = dim1
84  n2, c2, h2, w2 = dim2
85  assert(n1 == n2)
86  assert(c1 == c2)
87  is_pad = 'pad' in arg_map
88  if h1 != h2 or w1 != w2:
89  if h1 == h2 + 1:
90  pads['pad_b'] += 1
91  elif h1 != h2:
92  raise Exception("Unexpected dimensions for height:", h1, h2)
93  if w1 == w2 + 1:
94  pads['pad_r'] += 1
95  elif w1 != w2:
96  raise Exception("Unexpected dimensions for width:", w1, w2)
97  if is_pad:
98  op_def.arg.remove(arg_map['pad'])
99  args = []
100  for name in pads.keys():
101  arg = caffe2_pb2.Argument()
102  arg.name = name
103  arg.i = pads[name]
104  args.append(arg)
105  op_def.arg.extend(args)
106  else:
107  for name in pads.keys():
108  arg_map[name].i = pads[name]
109 
110 
111 def _RemoveLegacyPad(net, net_params, input_dims):
112  legacy_pad_ops = []
113  for i in range(len(net.op)):
114  op_def = net.op[i]
115  if re.match(r'^(Conv|ConvTranspose|MaxPool|AveragePool)(\dD)?$',
116  op_def.type):
117  for arg in op_def.arg:
118  if arg.name == 'legacy_pad':
119  legacy_pad_ops.append(i)
120  break
121  if legacy_pad_ops:
122  n, c, h, w = input_dims
123  dummy_input = np.random.randn(n, c, h, w).astype(np.float32)
124  dim_map = _GetLegacyDims(net, net_params, dummy_input, legacy_pad_ops)
125 
126  # Running with the legacy pad argument removed
127  # compare the dimensions and adjust pad argument when necessary
128  ws = workspace.C.Workspace()
129 
130  external_input = net.op[0].input[0]
131  ws.create_blob(external_input).feed_blob(dummy_input)
132  for param in net_params.protos:
133  ws.create_blob(param.name) \
134  .feed_blob(utils.Caffe2TensorToNumpyArray(param))
135 
136  for i in range(len(net.op)):
137  op_def = net.op[i]
138  if i in legacy_pad_ops:
139  arg_map = {}
140  for arg in op_def.arg:
141  arg_map[arg.name] = arg
142  pads = _GetLegacyPadArgs(op_def, arg_map)
143  # remove legacy pad arg
144  for j in range(len(op_def.arg)):
145  arg = op_def.arg[j]
146  if arg.name == 'legacy_pad':
147  del op_def.arg[j]
148  break
149  output = op_def.output[0]
150  # use a new name to avoid the interference with inplace
151  nonlegacy_output = output + '_nonlegacy'
152  op_def.output[0] = nonlegacy_output
153  ws._run_operator(op_def.SerializeToString())
154  blob_nonlegacy = ws.fetch_blob(nonlegacy_output)
155  # reset output name
156  op_def.output[0] = output
157 
158  dim1 = dim_map[i]
159  dim2 = blob_nonlegacy.shape
160  _AdjustDims(op_def, arg_map, pads, dim1, dim2)
161 
162  ws._run_operator(op_def.SerializeToString())
163  return net
164 
165 
166 def _GetBlobDimMap(net, net_params, dummy_input):
167  dim_map = {}
168  ws = workspace.C.Workspace()
169  for param in net_params.protos:
170  ws.create_blob(param.name) \
171  .feed(utils.Caffe2TensorToNumpyArray(param))
172  external_input = net.op[0].input[0]
173  ws.create_blob(external_input).feed(dummy_input)
174  # Get dimensions with legacy pad
175  for i in range(len(net.op)):
176  op_def = net.op[i]
177  ws._run_operator(op_def.SerializeToString())
178  for output in op_def.output:
179  blob = ws.fetch_blob(output)
180  dim_map[output] = blob.shape
181  return dim_map
182 
183 
184 def _GetInputDims(caffe_net):
185  input_dims = []
186  if caffe_net.input_dim:
187  input_dims = caffe_net.input_dim
188  elif caffe_net.input_shape:
189  input_dims = caffe_net.input_shape[0].dim
190  elif caffe_net.layer[0].input_param.shape:
191  # getting input dimension from first layer
192  input_dims = caffe_net.layer[0].input_param.shape[0].dim
193  return input_dims
194 
195 
196 class TranslatorRegistry(object):
197  registry_ = {}
198 
199  @classmethod
200  def Register(cls, op_name):
201  """A decorator for registering gradient mappings."""
202 
203  def Wrapper(func):
204  cls.registry_[op_name] = func
205  return func
206 
207  return Wrapper
208 
209  @classmethod
210  def TranslateLayer(cls, layer, pretrained_blobs, is_test, **kwargs):
211  try:
212  caffe_ops, params = cls.registry_[layer.type](
213  layer, pretrained_blobs, is_test, **kwargs)
214  except KeyError:
215  raise KeyError('No translator registered for layer: %s yet.' %
216  str(layer))
217  if caffe_ops is None:
218  caffe_ops = []
219  if type(caffe_ops) is not list:
220  caffe_ops = [caffe_ops]
221  return caffe_ops, params
222 
223  @classmethod
224  def TranslateModel(
225  cls,
226  caffe_net,
227  pretrained_net,
228  is_test=False,
229  net_state=None,
230  remove_legacy_pad=False,
231  input_dims=None
232  ):
233  net_state = caffe_pb2.NetState() if net_state is None else net_state
234  net = caffe2_pb2.NetDef()
235  net.name = caffe_net.name
236  net_params = caffe2_pb2.TensorProtos()
237  if len(caffe_net.layers) > 0:
238  raise ValueError(
239  'I think something is wrong. This translation script '
240  'only accepts new style layers that are stored in the '
241  'layer field.'
242  )
243  if not input_dims:
244  input_dims = _GetInputDims(caffe_net)
245  for layer in caffe_net.layer:
246  if not _ShouldInclude(net_state, layer):
247  log.info('Current net state does not need layer {}'
248  .format(layer.name))
249  continue
250  log.info('Translate layer {}'.format(layer.name))
251  # Get pretrained one
252  pretrained_layers = (
253  [l for l in pretrained_net.layer
254  if l.name == layer.name] + [l
255  for l in pretrained_net.layers
256  if l.name == layer.name]
257  )
258  if len(pretrained_layers) > 1:
259  raise ValueError(
260  'huh? more than one pretrained layer of one name?')
261  elif len(pretrained_layers) == 1:
262  pretrained_blobs = [
263  utils.CaffeBlobToNumpyArray(blob)
264  for blob in pretrained_layers[0].blobs
265  ]
266  else:
267  # No pretrained layer for the given layer name. We'll just pass
268  # no parameter blobs.
269  # print 'No pretrained layer for layer', layer.name
270  pretrained_blobs = []
271  operators, params = cls.TranslateLayer(
272  layer, pretrained_blobs, is_test, net=net,
273  net_params=net_params, input_dims=input_dims)
274  net.op.extend(operators)
275  net_params.protos.extend(params)
276  if remove_legacy_pad:
277  assert input_dims, \
278  'Please specify input_dims to remove legacy_pad'
279  net = _RemoveLegacyPad(net, net_params, input_dims)
280  return net, net_params
281 
282 
283 def TranslateModel(*args, **kwargs):
284  return TranslatorRegistry.TranslateModel(*args, **kwargs)
285 
286 
287 def ConvertTensorProtosToInitNet(net_params, input_name):
288  """Takes the net_params returned from TranslateModel, and wrap it as an
289  init net that contain GivenTensorFill.
290 
291  This is a very simple feature that only works with float tensors, and is
292  only intended to be used in an environment where you want a single
293  initialization file - for more complex cases, use a db to store the
294  parameters.
295  """
296  init_net = caffe2_pb2.NetDef()
297  for tensor in net_params.protos:
298  if len(tensor.float_data) == 0:
299  raise RuntimeError(
300  "Only float tensors are supported in this util.")
301  op = core.CreateOperator(
302  "GivenTensorFill", [], [tensor.name],
303  arg=[
304  utils.MakeArgument("shape", list(tensor.dims)),
305  utils.MakeArgument("values", tensor.float_data)])
306  init_net.op.extend([op])
307  init_net.op.extend([core.CreateOperator("ConstantFill", [], [input_name], shape=[1])])
308  return init_net
309 
310 
311 def BaseTranslate(layer, caffe2_type):
312  """A simple translate interface that maps the layer input and output."""
313  caffe2_op = caffe2_pb2.OperatorDef()
314  caffe2_op.type = caffe2_type
315  caffe2_op.input.extend(layer.bottom)
316  caffe2_op.output.extend(layer.top)
317  return caffe2_op
318 
319 
320 def AddArgument(op, key, value):
321  """Makes an argument based on the value type."""
322  op.arg.extend([utils.MakeArgument(key, value)])
323 
324 ################################################################################
325 # Common translators for layers.
326 ################################################################################
327 
328 
329 @TranslatorRegistry.Register("Input")
330 def TranslateInput(layer, pretrained_blobs, is_test, **kwargs):
331  return [], []
332 
333 
334 @TranslatorRegistry.Register("VideoData")
335 def TranslateVideoData(layer, pretrained_blobs, is_test, **kwargs):
336  return [], []
337 
338 
339 @TranslatorRegistry.Register("Data")
340 def TranslateData(layer, pretrained_blobs, is_test, **kwargs):
341  return [], []
342 
343 
344 # A function used in convolution, pooling and deconvolution to deal with
345 # conv pool specific parameters.
346 def _TranslateStridePadKernelHelper(param, caffe_op):
347  try:
348  if (len(param.stride) > 1 or len(param.kernel_size) > 1 or
349  len(param.pad) > 1):
350  raise NotImplementedError(
351  "Translator currently does not support non-conventional "
352  "pad/kernel/stride settings."
353  )
354  stride = param.stride[0] if len(param.stride) else 1
355  pad = param.pad[0] if len(param.pad) else 0
356  kernel = param.kernel_size[0] if len(param.kernel_size) else 0
357  except TypeError:
358  # This catches the case of a PoolingParameter, in which case we are
359  # having non-repeating pad, stride and kernel.
360  stride = param.stride
361  pad = param.pad
362  kernel = param.kernel_size
363  # Get stride
364  if param.HasField("stride_h") or param.HasField("stride_w"):
365  AddArgument(caffe_op, "stride_h", param.stride_h)
366  AddArgument(caffe_op, "stride_w", param.stride_w)
367  else:
368  AddArgument(caffe_op, "stride", stride)
369  # Get pad
370  if param.HasField("pad_h") or param.HasField("pad_w"):
371  if param.pad_h == param.pad_w:
372  AddArgument(caffe_op, "pad", param.pad_h)
373  else:
374  AddArgument(caffe_op, "pad_t", param.pad_h)
375  AddArgument(caffe_op, "pad_b", param.pad_h)
376  AddArgument(caffe_op, "pad_l", param.pad_w)
377  AddArgument(caffe_op, "pad_r", param.pad_w)
378  else:
379  AddArgument(caffe_op, "pad", pad)
380  # Get kernel
381  if param.HasField("kernel_h") or param.HasField("kernel_w"):
382  AddArgument(caffe_op, "kernel_h", param.kernel_h)
383  AddArgument(caffe_op, "kernel_w", param.kernel_w)
384  else:
385  AddArgument(caffe_op, "kernel", kernel)
386 
387 
388 @TranslatorRegistry.Register("Convolution3D")
389 def TranslateConvNd(layer, pretrained_blobs, is_test, **kwargs):
390  param = layer.convolution3d_param
391  caffe_op = BaseTranslate(layer, "Conv")
392  output = caffe_op.output[0]
393  caffe_op.input.append(output + '_w')
394 
395  AddArgument(
396  caffe_op,
397  "kernels",
398  [param.kernel_depth, param.kernel_size, param.kernel_size])
399  AddArgument(
400  caffe_op,
401  "strides",
402  [param.temporal_stride, param.stride, param.stride])
403  temporal_pad = 0
404  spatial_pad = 0
405  if hasattr(param, 'temporal_pad'):
406  temporal_pad = param.temporal_pad
407  if hasattr(param, 'pad'):
408  spatial_pad = param.pad
409  AddArgument(caffe_op, "pads", [temporal_pad, spatial_pad, spatial_pad] * 2)
410 
411  # weight
412  params = [
413  utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')]
414  # bias
415  if len(pretrained_blobs) == 2:
416  caffe_op.input.append(output + '_b')
417  params.append(
418  utils.NumpyArrayToCaffe2Tensor(
419  pretrained_blobs[1].flatten(), output + '_b'))
420  return caffe_op, params
421 
422 
423 @TranslatorRegistry.Register("Convolution")
424 def TranslateConv(layer, pretrained_blobs, is_test, **kwargs):
425  param = layer.convolution_param
426  caffe_op = BaseTranslate(layer, "Conv")
427  output = caffe_op.output[0]
428  caffe_op.input.append(output + '_w')
429  _TranslateStridePadKernelHelper(param, caffe_op)
430  # weight
431  params = [
432  utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')]
433  # bias
434  if len(pretrained_blobs) == 2:
435  caffe_op.input.append(output + '_b')
436  params.append(
437  utils.NumpyArrayToCaffe2Tensor(
438  pretrained_blobs[1].flatten(), output + '_b'))
439  # Group convolution option
440  if param.group != 1:
441  AddArgument(caffe_op, "group", param.group)
442  # Get dilation - not tested. If you have a model and this checks out,
443  # please provide a test and uncomment this.
444  if len(param.dilation) > 0:
445  if len(param.dilation) == 1:
446  AddArgument(caffe_op, "dilation", param.dilation[0])
447  elif len(param.dilation) == 2:
448  AddArgument(caffe_op, "dilation_h", param.dilation[0])
449  AddArgument(caffe_op, "dilation_w", param.dilation[1])
450  return caffe_op, params
451 
452 
453 @TranslatorRegistry.Register("Deconvolution")
454 def TranslateDeconv(layer, pretrained_blobs, is_test, **kwargs):
455  param = layer.convolution_param
456  if param.group > 1:
457  raise NotImplementedError(
458  "Translator currently does not support group deconvolution."
459  )
460  caffe_op = BaseTranslate(layer, "ConvTranspose")
461  output = caffe_op.output[0]
462  _TranslateStridePadKernelHelper(param, caffe_op)
463  caffe_op.input.extend([output + '_w'])
464  AddArgument(caffe_op, "order", "NCHW")
465  weight = utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')
466  if param.bias_term:
467  bias = utils.NumpyArrayToCaffe2Tensor(
468  pretrained_blobs[1].flatten(), output + '_b'
469  )
470  caffe_op.input.extend([output + '_b'])
471  return caffe_op, [weight, bias]
472  else:
473  return caffe_op, [weight]
474 
475 
476 @TranslatorRegistry.Register("Crop")
477 def TranslateCrop(layer, pretrained_blobs, is_test, **kwargs):
478  net, net_params, input_dims = kwargs['net'], kwargs['net_params'], kwargs['input_dims']
479  n, c, h, w = input_dims
480  dummy_input = np.random.randn(n, c, h, w).astype(np.float32)
481  dim_map = _GetBlobDimMap(net, net_params, dummy_input)
482  param = layer.crop_param
483  axis, offsets = param.axis, param.offset
484  caffe_op = BaseTranslate(layer, "Slice")
485  input_1 = caffe_op.input[1]
486  input_1_dim = dim_map[input_1]
487  starts, ends = [], []
488  dims = len(dim_map[input_1])
489  assert len(offsets) == 1, 'Caffe Translator for Crop only works for offset \
490  of 1 for now'
491  for _ in range(axis):
492  starts.append(0)
493  ends.append(-1)
494  end_offset = [int(offsets[0] + input_1_dim[i]) for i in range(axis, dims)]
495  ends.extend(end_offset)
496  starts.extend([offsets[0]] * len(end_offset))
497  op = caffe2_pb2.OperatorDef()
498  op.input.extend([caffe_op.input[0]])
499  op.output.extend(caffe_op.output)
500  op.arg.extend(caffe_op.arg)
501  op.type = caffe_op.type
502  AddArgument(op, "starts", starts)
503  AddArgument(op, "ends", ends)
504  return op, []
505 
506 @TranslatorRegistry.Register("ReLU")
507 def TranslateRelu(layer, pretrained_blobs, is_test, **kwargs):
508  return BaseTranslate(layer, "Relu"), []
509 
510 
511 @TranslatorRegistry.Register("Pooling")
512 def TranslatePool(layer, pretrained_blobs, is_test, **kwargs):
513  param = layer.pooling_param
514  if param.pool == caffe_pb2.PoolingParameter.MAX:
515  caffe_op = BaseTranslate(layer, "MaxPool")
516  elif param.pool == caffe_pb2.PoolingParameter.AVE:
517  caffe_op = BaseTranslate(layer, "AveragePool")
518  _TranslateStridePadKernelHelper(param, caffe_op)
519  AddArgument(caffe_op, "order", "NCHW")
520  try:
521  # In the Facebook port of Caffe, a torch_pooling field was added to
522  # map the pooling computation of Torch. Essentially, it uses
523  # floor((height + 2 * padding - kernel) / stride) + 1
524  # instead of
525  # ceil((height + 2 * padding - kernel) / stride) + 1
526  # which is Caffe's version.
527  # Torch pooling is actually the same as Caffe2 pooling, so we don't
528  # need to do anything.
529  is_torch_pooling = param.torch_pooling
530  except AttributeError:
531  is_torch_pooling = False
532  if not is_torch_pooling:
533  AddArgument(caffe_op, "legacy_pad",
534  caffe2_legacy_pb2.CAFFE_LEGACY_POOLING)
535  if param.global_pooling:
536  AddArgument(caffe_op, "global_pooling", 1)
537  return caffe_op, []
538 
539 
540 @TranslatorRegistry.Register("Pooling3D")
541 def TranslatePool3D(layer, pretrained_blobs, is_test, **kwargs):
542  param = layer.pooling3d_param
543  if param.pool == caffe_pb2.Pooling3DParameter.MAX:
544  caffe_op = BaseTranslate(layer, "MaxPool")
545 
546  elif param.pool == caffe_pb2.Pooling3DParameter.AVE:
547  caffe_op = BaseTranslate(layer, "AveragePool")
548  AddArgument(caffe_op, "order", "NCHW")
549  AddArgument(
550  caffe_op,
551  "kernels",
552  [param.kernel_depth, param.kernel_size, param.kernel_size])
553 
554  AddArgument(
555  caffe_op,
556  "strides",
557  [param.temporal_stride, param.stride, param.stride])
558  temporal_pad = 0
559  spatial_pad = 0
560  if hasattr(param, 'temporal_pad'):
561  temporal_pad = param.temporal_pad
562  if hasattr(param, 'pad'):
563  spatial_pad = param.pad
564  AddArgument(caffe_op, "pads", [temporal_pad, spatial_pad, spatial_pad] * 2)
565  return caffe_op, []
566 
567 
568 @TranslatorRegistry.Register("LRN")
569 def TranslateLRN(layer, pretrained_blobs, is_test, **kwargs):
570  caffe_op = BaseTranslate(layer, "LRN")
571  caffe_op.output.extend(['_' + caffe_op.output[0] + '_scale'])
572  param = layer.lrn_param
573  if param.norm_region != caffe_pb2.LRNParameter.ACROSS_CHANNELS:
574  raise ValueError(
575  "Does not support norm region other than across channels.")
576  AddArgument(caffe_op, "size", int(param.local_size))
577  AddArgument(caffe_op, "alpha", float(param.alpha))
578  AddArgument(caffe_op, "beta", float(param.beta))
579  AddArgument(caffe_op, "bias", float(param.k))
580  AddArgument(caffe_op, "order", "NCHW")
581  return caffe_op, []
582 
583 
584 @TranslatorRegistry.Register("InnerProduct")
585 def TranslateInnerProduct(layer, pretrained_blobs, is_test, **kwargs):
586  param = layer.inner_product_param
587  try:
588  if param.axis != 1 or param.transpose:
589  raise ValueError(
590  "We don't have testing case for non-default axis and transpose "
591  "cases yet so we are disabling it for now. If you have a model "
592  "with this, please do send us your model for us to update this "
593  "support, and you are more than welcome to send a PR for this.")
594  except AttributeError:
595  # We might be using an historic Caffe protobuf that does not have axis
596  # and transpose arguments, so we will silently pass.
597  pass
598  caffe_op = BaseTranslate(layer, "FC")
599  output = caffe_op.output[0]
600  caffe_op.input.extend([output + '_w', output + '_b'])
601  # To provide the old-style 4-dimensional blob (1, 1, dim_output, dim_input)
602  # case, we always explicitly reshape the pretrained blob.
603  if pretrained_blobs[0].ndim not in [2, 4]:
604  raise ValueError("Unexpected weight ndim.")
605  if (pretrained_blobs[0].ndim == 4 and
606  list(pretrained_blobs[0].shape[:2]) != [1, 1]):
607  raise ValueError(
608  "If pretrained blob has 4 dims (old-style Caffe), the first two "
609  "should be of value 1, but I got " + str(pretrained_blobs[0].shape))
610  weight = utils.NumpyArrayToCaffe2Tensor(
611  pretrained_blobs[0].reshape(-1, pretrained_blobs[0].shape[-1]),
612  output + '_w'
613  )
614  bias = utils.NumpyArrayToCaffe2Tensor(
615  pretrained_blobs[1].flatten(), output + '_b'
616  )
617  return caffe_op, [weight, bias]
618 
619 
620 @TranslatorRegistry.Register("Dropout")
621 def TranslateDropout(layer, pretrained_blobs, is_test, **kwargs):
622  caffe_op = BaseTranslate(layer, "Dropout")
623  caffe_op.output.extend(['_' + caffe_op.output[0] + '_mask'])
624  param = layer.dropout_param
625  AddArgument(caffe_op, "ratio", param.dropout_ratio)
626  if (is_test):
627  AddArgument(caffe_op, "is_test", 1)
628  return caffe_op, []
629 
630 
631 @TranslatorRegistry.Register("Softmax")
632 def TranslateSoftmax(layer, pretrained_blobs, is_test, **kwargs):
633  caffe_op = BaseTranslate(layer, "Softmax")
634  return caffe_op, []
635 
636 
637 @TranslatorRegistry.Register("SoftmaxWithLoss")
638 def TranslateSoftmaxWithLoss(layer, pretrained_blobs, is_test, **kwargs):
639  softmax_op = core.CreateOperator(
640  "Softmax", [layer.bottom[0]],
641  layer.bottom[0] + "_translator_autogen_softmax")
642  xent_op = core.CreateOperator(
643  "LabelCrossEntropy",
644  [softmax_op.output[0], layer.bottom[1]],
645  layer.bottom[0] + "_translator_autogen_xent")
646  loss_op = core.CreateOperator(
647  "AveragedLoss",
648  xent_op.output[0],
649  layer.top[0])
650  return [softmax_op, xent_op, loss_op], []
651 
652 
653 @TranslatorRegistry.Register("Accuracy")
654 def TranslateAccuracy(layer, pretrained_blobs, is_test, **kwargs):
655  caffe_op = BaseTranslate(layer, "Accuracy")
656  if layer.accuracy_param.top_k != 1:
657  AddArgument(caffe_op, "top_k", layer.accuracy_param.top_k)
658  return caffe_op, []
659 
660 
661 @TranslatorRegistry.Register("Concat")
662 def TranslateConcat(layer, pretrained_blobs, is_test, **kwargs):
663  caffe_op = BaseTranslate(layer, "Concat")
664  caffe_op.output.extend(['_' + caffe_op.output[0] + '_dims'])
665  AddArgument(caffe_op, "order", "NCHW")
666  return caffe_op, []
667 
668 
669 @TranslatorRegistry.Register("TanH")
670 def TranslateTanH(layer, pretrained_blobs, is_test, **kwargs):
671  caffe_op = BaseTranslate(layer, "Tanh")
672  return caffe_op, []
673 
674 
675 @TranslatorRegistry.Register("InstanceNorm")
676 def TranslateInstanceNorm(layer, pretrained_blobs, is_test, **kwargs):
677  caffe_op = BaseTranslate(layer, "InstanceNorm")
678  output = caffe_op.output[0]
679  weight = utils.NumpyArrayToCaffe2Tensor(
680  pretrained_blobs[0].flatten(), output + '_w')
681  bias = utils.NumpyArrayToCaffe2Tensor(
682  pretrained_blobs[1].flatten(), output + '_b')
683  caffe_op.input.extend([output + '_w', output + '_b'])
684  AddArgument(caffe_op, "order", "NCHW")
685  return caffe_op, [weight, bias]
686 
687 
688 @TranslatorRegistry.Register("BatchNorm")
689 def TranslateBatchNorm(layer, pretrained_blobs, is_test, **kwargs):
690  caffe_op = BaseTranslate(layer, "SpatialBN")
691  output = caffe_op.output[0]
692  param = layer.batch_norm_param
693  AddArgument(caffe_op, "is_test", is_test)
694  AddArgument(caffe_op, "epsilon", param.eps)
695  AddArgument(caffe_op, "order", "NCHW")
696 
697  caffe_op.input.extend(
698  [output + "_scale",
699  output + "_bias",
700  output + "_mean",
701  output + "_var"])
702  if not is_test:
703  caffe_op.output.extend(
704  [output + "_mean",
705  output + "_var",
706  output + "_saved_mean",
707  output + "_saved_var"])
708 
709  n_channels = pretrained_blobs[0].shape[0]
710  if pretrained_blobs[2][0] != 0:
711  mean = utils.NumpyArrayToCaffe2Tensor(
712  (1. / pretrained_blobs[2][0]) * pretrained_blobs[0],
713  output + '_mean')
714  var = utils.NumpyArrayToCaffe2Tensor(
715  (1. / pretrained_blobs[2][0]) * pretrained_blobs[1],
716  output + '_var')
717  else:
718  raise RuntimeError("scalar is zero.")
719  if len(pretrained_blobs) > 3:
720  # IntelCaffe and NVCaffe uses fused BN+Scale,
721  # three blobs for BN and two blobs for Scale,
722  # so that the total number of blobs becomes five (including scale and bias).
723  scale = utils.NumpyArrayToCaffe2Tensor(
724  pretrained_blobs[3].flatten(),
725  output + '_scale')
726  bias = utils.NumpyArrayToCaffe2Tensor(
727  pretrained_blobs[4].flatten(),
728  output + '_bias')
729  else:
730  pretrained_blobs[2][0] = 1
731  pretrained_blobs[2] = np.tile(pretrained_blobs[2], (n_channels, ))
732  scale = utils.NumpyArrayToCaffe2Tensor(
733  pretrained_blobs[2],
734  output + '_scale')
735  bias = utils.NumpyArrayToCaffe2Tensor(
736  np.zeros_like(pretrained_blobs[2]),
737  output + '_bias')
738 
739  return caffe_op, [scale, bias, mean, var]
740 
741 
742 @TranslatorRegistry.Register("Eltwise")
743 def TranslateElementWise(layer, pretrained_blobs, is_test, **kwargs):
744  param = layer.eltwise_param
745  # TODO(jiayq): if we have a protobuf that uses this, lift this constraint
746  # and verify that we can correctly translate.
747  if len(param.coeff) or param.operation != 1:
748  raise RuntimeError("This eltwise layer is not yet supported.")
749  caffe_op = BaseTranslate(layer, "Sum")
750  return caffe_op, []
751 
752 
753 @TranslatorRegistry.Register("Scale")
754 def TranslateScale(layer, pretrained_blobs, is_test, **kwargs):
755  mul_op = BaseTranslate(layer, "Mul")
756  scale_param = layer.scale_param
757  AddArgument(mul_op, "axis", scale_param.axis)
758  AddArgument(mul_op, "broadcast", True)
759  if len(mul_op.input) == 1:
760  # the scale parameter is in pretrained blobs
761  if scale_param.num_axes != 1:
762  raise RuntimeError("This path has not been verified yet.")
763 
764  output = mul_op.output[0]
765  mul_op_param = output + 'scale_w'
766  mul_op.input.append(mul_op_param)
767  weights = []
768  weights.append(utils.NumpyArrayToCaffe2Tensor(
769  pretrained_blobs[0].flatten(), mul_op_param))
770 
771  add_op = None
772  if len(pretrained_blobs) == 1:
773  # No bias-term in Scale layer
774  pass
775  elif len(pretrained_blobs) == 2:
776  # Caffe Scale layer supports a bias term such that it computes
777  # (scale_param * X + bias), whereas Caffe2 Mul op doesn't.
778  # Include a separate Add op for the bias followed by Mul.
779  add_op = copy.deepcopy(mul_op)
780  add_op.type = "Add"
781  add_op_param = output + 'scale_b'
782  internal_blob = output + "_internal"
783  del mul_op.output[:]
784  mul_op.output.append(internal_blob)
785  del add_op.input[:]
786  add_op.input.append(internal_blob)
787  add_op.input.append(add_op_param)
788  weights.append(utils.NumpyArrayToCaffe2Tensor(
789  pretrained_blobs[1].flatten(), add_op_param))
790  else:
791  raise RuntimeError("Unexpected number of pretrained blobs in Scale")
792 
793  caffe_ops = [mul_op]
794  if add_op:
795  caffe_ops.append(add_op)
796  assert len(caffe_ops) == len(weights)
797  return caffe_ops, weights
798  elif len(mul_op.input) == 2:
799  # TODO(jiayq): find a protobuf that uses this and verify.
800  raise RuntimeError("This path has not been verified yet.")
801  else:
802  raise RuntimeError("Unexpected number of inputs.")
803 
804 
805 @TranslatorRegistry.Register("Reshape")
806 def TranslateReshape(layer, pretrained_blobs, is_test, **kwargs):
807  caffe_op = BaseTranslate(layer, "Reshape")
808  caffe_op.output.append("_" + caffe_op.input[0] + "_dims")
809  reshape_param = layer.reshape_param
810  AddArgument(caffe_op, 'shape', reshape_param.shape.dim)
811  return caffe_op, []
812 
813 
814 @TranslatorRegistry.Register("Flatten")
815 def TranslateFlatten(layer, pretrained_blobs, is_test, **kwargs):
816  param = layer.flatten_param
817  if param.end_axis != -1:
818  raise NotImplementedError("flatten_param.end_axis not supported yet.")
819 
820  if param.axis == 0:
821  caffe_op = BaseTranslate(layer, "FlattenToVec")
822  elif param.axis == 1:
823  caffe_op = BaseTranslate(layer, "Flatten")
824  else:
825  # This could be a Reshape op, but dim size is not known here.
826  raise NotImplementedError(
827  "Not supported yet for flatten_param.axis {}.".format(param.axis))
828 
829  return caffe_op, []
830 
831 
832 @TranslatorRegistry.Register("Sigmoid")
833 def TranslateSigmoid(layer, pretrained_blobs, is_test, **kwargs):
834  caffe_op = BaseTranslate(layer, "Sigmoid")
835  return caffe_op, []
836 
837 
838 @TranslatorRegistry.Register("ROIPooling")
839 def TranslateROIPooling(layer, pretrained_blobs, is_test, **kwargs):
840  caffe_op = BaseTranslate(layer, "RoIPool")
841  AddArgument(caffe_op, "order", "NCHW")
842 
843  if is_test:
844  AddArgument(caffe_op, "is_test", is_test)
845  else:
846  # Only used for gradient computation
847  caffe_op.output.append(caffe_op.output[0] + '_argmaxes')
848 
849  param = layer.roi_pooling_param
850  if param.HasField('pooled_h'):
851  AddArgument(caffe_op, 'pooled_h', param.pooled_h)
852  if param.HasField('pooled_w'):
853  AddArgument(caffe_op, 'pooled_w', param.pooled_w)
854  if param.HasField('spatial_scale'):
855  AddArgument(caffe_op, 'spatial_scale', param.spatial_scale)
856 
857  return caffe_op, []
858 
859 
860 @TranslatorRegistry.Register("PReLU")
861 def TranslatePRelu(layer, pretrained_blobs, is_test, **kwargs):
862  caffe_op = BaseTranslate(layer, "PRelu")
863  output = caffe_op.output[0]
864  caffe_op.input.extend([output + '_Slope'])
865  slope = utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_Slope')
866 
867  return caffe_op, [slope]
868 
869 
870 @TranslatorRegistry.Register("Reduction")
871 def TranslateReduction(layer, pretrained_blobs, is_test, **kwargs):
872  param = layer.reduction_param
873  if param.operation == caffe_pb2.ReductionParameter.SUM:
874  caffe_op = BaseTranslate(layer, "ReduceBackSum")
875  elif param.operation == caffe_pb2.ReductionParameter.MEAN:
876  caffe_op = BaseTranslate(layer, "ReduceBackMean")
877  else:
878  raise NotImplementedError("Not yet supported")
879 
880  if param.axis > 0:
881  # We can't figure out the number of dims to reduce from positive axis
882  # for back reduction since the shape info is not known here.
883  raise NotImplementedError("Not yet supported")
884  num_reduce_dim = -param.axis
885  AddArgument(caffe_op, "num_reduce_dim", num_reduce_dim)
886 
887  return caffe_op, []
888 
889 
890 if __name__ == '__main__':
891  parser = argparse.ArgumentParser(
892  description="Utilitity to convert pretrained caffe models to Caffe2 models.")
893  parser.add_argument("prototext", help="Caffe prototext.")
894  parser.add_argument("caffemodel", help="Caffe trained model.")
895  parser.add_argument("--init_net", help="Caffe2 initialization net.",
896  default="init_net.pb")
897  parser.add_argument("--predict_net", help="Caffe2 prediction net.",
898  default="predict_net.pb")
899  parser.add_argument("--remove_legacy_pad", help="Remove legacy pad \
900  (Only works for nets with one input blob)",
901  action="store_true",
902  default=False)
903  parser.add_argument("--input_dims", help="Dimension of input blob", nargs='+',
904  type=int, default=[])
905  args = parser.parse_args()
906 
907  caffenet = caffe_pb2.NetParameter()
908  caffenet_pretrained = caffe_pb2.NetParameter()
909  input_proto = args.prototext
910  input_caffemodel = args.caffemodel
911  output_init_net = args.init_net
912  output_predict_net = args.predict_net
913 
914  with open(input_proto) as f:
915  text_format.Merge(f.read(), caffenet)
916  with open(input_caffemodel, 'rb') as f:
917  caffenet_pretrained.ParseFromString(f.read())
918  net, pretrained_params = TranslateModel(
919  caffenet, caffenet_pretrained, is_test=True,
920  remove_legacy_pad=args.remove_legacy_pad,
921  input_dims=args.input_dims
922  )
923 
924  # Assume there is one input and one output
925  external_input = net.op[0].input[0]
926  external_output = net.op[-1].output[0]
927 
928  net.external_input.extend([external_input])
929  net.external_input.extend([param.name for param in pretrained_params.protos])
930  net.external_output.extend([external_output])
931  init_net = ConvertTensorProtosToInitNet(pretrained_params, external_input)
932 
933  with open(output_predict_net, 'wb') as f:
934  f.write(net.SerializeToString())
935  with open(output_predict_net + 'txt', 'w') as f:
936  f.write(str(net))
937  with open(output_init_net, 'wb') as f:
938  f.write(init_net.SerializeToString())
def TranslateLayer(cls, layer, pretrained_blobs, is_test, kwargs)