Caffe2 - Python API
A deep learning, cross platform ML framework
caffe_translator.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package caffe_translator
17 # Module caffe2.python.caffe_translator
18 #!/usr/bin/env python2
19 
20 import argparse
21 import copy
22 import logging
23 import re
24 import numpy as np # noqa
25 
26 from caffe2.proto import caffe2_pb2, caffe2_legacy_pb2
27 from caffe.proto import caffe_pb2
28 from caffe2.python import core, utils, workspace
29 from google.protobuf import text_format
30 
31 logging.basicConfig()
32 log = logging.getLogger("caffe_translator")
33 log.setLevel(logging.INFO)
34 
35 
36 def _StateMeetsRule(state, rule):
37  """A function that reproduces Caffe's StateMeetsRule functionality."""
38  if rule.HasField('phase') and rule.phase != state.phase:
39  return False
40  if rule.HasField('min_level') and state.level < rule.min_level:
41  return False
42  if rule.HasField('max_level') and state.level > rule.max_level:
43  return False
44  curr_stages = set(list(state.stage))
45  # all stages in rule.stages should be in, otherwise it's not a match.
46  if len(rule.stage) and any([s not in curr_stages for s in rule.stage]):
47  return False
48  # none of the stage in rule.stages should be in, otherwise it's not a match.
49  if len(rule.not_stage) and any([s in curr_stages for s in rule.not_stage]):
50  return False
51  # If none of the nonmatch happens, return True.
52  return True
53 
54 
55 def _ShouldInclude(net_state, layer):
56  """A function that reproduces Caffe's inclusion and exclusion rule."""
57  ret = (len(layer.include) == 0)
58  # check exclude rules: if any exclusion is met, we shouldn't include.
59  ret &= not any([_StateMeetsRule(net_state, rule) for rule in layer.exclude])
60  if len(layer.include):
61  # check include rules: if any inclusion is met, we should include.
62  ret |= any([_StateMeetsRule(net_state, rule) for rule in layer.include])
63  return ret
64 
65 
66 def _GetLegacyDims(net, net_params, dummy_input, legacy_pad_ops):
67  dim_map = {}
68  ws = workspace.C.Workspace()
69  for param in net_params.protos:
70  ws.create_blob(param.name) \
71  .feed(utils.Caffe2TensorToNumpyArray(param))
72  external_input = net.op[0].input[0]
73  ws.create_blob(external_input).feed(dummy_input)
74  # Get dimensions with legacy pad
75  for i in range(len(net.op)):
76  op_def = net.op[i]
77  ws._run_operator(op_def.SerializeToString())
78  if i in legacy_pad_ops:
79  output = op_def.output[0]
80  blob_legacy = ws.fetch_blob(output)
81  dim_map[i] = blob_legacy.shape
82  return dim_map
83 
84 
85 def _GetLegacyPadArgs(op_def, arg_map):
86  pads = {}
87  keys = ['pad_l', 'pad_t', 'pad_r', 'pad_b']
88  is_pad = 'pad' in arg_map
89  if is_pad:
90  for k in keys:
91  pads[k] = arg_map['pad'].i
92  else:
93  pads = {x: arg_map[x].i for x in keys}
94  return pads
95 
96 
97 def _AdjustDims(op_def, arg_map, pads, dim1, dim2):
98  n1, c1, h1, w1 = dim1
99  n2, c2, h2, w2 = dim2
100  assert(n1 == n2)
101  assert(c1 == c2)
102  is_pad = 'pad' in arg_map
103  if h1 != h2 or w1 != w2:
104  if h1 == h2 + 1:
105  pads['pad_b'] += 1
106  elif h1 != h2:
107  raise Exception("Unexpected dimensions for height:", h1, h2)
108  if w1 == w2 + 1:
109  pads['pad_r'] += 1
110  elif w1 != w2:
111  raise Exception("Unexpected dimensions for width:", w1, w2)
112  if is_pad:
113  op_def.arg.remove(arg_map['pad'])
114  args = []
115  for name in pads.keys():
116  arg = caffe2_pb2.Argument()
117  arg.name = name
118  arg.i = pads[name]
119  args.append(arg)
120  op_def.arg.extend(args)
121  else:
122  for name in pads.keys():
123  arg_map[name].i = pads[name]
124 
125 
126 def _RemoveLegacyPad(net, net_params, input_dims):
127  legacy_pad_ops = []
128  for i in range(len(net.op)):
129  op_def = net.op[i]
130  if re.match(r'^(Conv|ConvTranspose|MaxPool|AveragePool)(\dD)?$',
131  op_def.type):
132  for arg in op_def.arg:
133  if arg.name == 'legacy_pad':
134  legacy_pad_ops.append(i)
135  break
136  if legacy_pad_ops:
137  n, c, h, w = input_dims
138  dummy_input = np.random.randn(n, c, h, w).astype(np.float32)
139  dim_map = _GetLegacyDims(net, net_params, dummy_input, legacy_pad_ops)
140 
141  # Running with the legacy pad argument removed
142  # compare the dimensions and adjust pad argument when necessary
143  ws = workspace.C.Workspace()
144 
145  external_input = net.op[0].input[0]
146  ws.create_blob(external_input).feed_blob(dummy_input)
147  for param in net_params.protos:
148  ws.create_blob(param.name) \
149  .feed_blob(utils.Caffe2TensorToNumpyArray(param))
150 
151  for i in range(len(net.op)):
152  op_def = net.op[i]
153  if i in legacy_pad_ops:
154  arg_map = {}
155  for arg in op_def.arg:
156  arg_map[arg.name] = arg
157  pads = _GetLegacyPadArgs(op_def, arg_map)
158  # remove legacy pad arg
159  for j in range(len(op_def.arg)):
160  arg = op_def.arg[j]
161  if arg.name == 'legacy_pad':
162  del op_def.arg[j]
163  break
164  output = op_def.output[0]
165  # use a new name to avoid the interference with inplace
166  nonlegacy_output = output + '_nonlegacy'
167  op_def.output[0] = nonlegacy_output
168  ws._run_operator(op_def.SerializeToString())
169  blob_nonlegacy = ws.fetch_blob(nonlegacy_output)
170  # reset output name
171  op_def.output[0] = output
172 
173  dim1 = dim_map[i]
174  dim2 = blob_nonlegacy.shape
175  _AdjustDims(op_def, arg_map, pads, dim1, dim2)
176 
177  ws._run_operator(op_def.SerializeToString())
178  return net
179 
180 
181 def _GetBlobDimMap(net, net_params, dummy_input):
182  dim_map = {}
183  ws = workspace.C.Workspace()
184  for param in net_params.protos:
185  ws.create_blob(param.name) \
186  .feed(utils.Caffe2TensorToNumpyArray(param))
187  external_input = net.op[0].input[0]
188  ws.create_blob(external_input).feed(dummy_input)
189  # Get dimensions with legacy pad
190  for i in range(len(net.op)):
191  op_def = net.op[i]
192  ws._run_operator(op_def.SerializeToString())
193  for output in op_def.output:
194  blob = ws.fetch_blob(output)
195  dim_map[output] = blob.shape
196  return dim_map
197 
198 
199 def _GetInputDims(caffe_net):
200  input_dims = []
201  if caffe_net.input_dim:
202  input_dims = caffe_net.input_dim
203  elif caffe_net.input_shape:
204  input_dims = caffe_net.input_shape[0].dim
205  elif caffe_net.layer[0].input_param.shape:
206  # getting input dimension from first layer
207  input_dims = caffe_net.layer[0].input_param.shape[0].dim
208  return input_dims
209 
210 
211 class TranslatorRegistry(object):
212  registry_ = {}
213 
214  @classmethod
215  def Register(cls, op_name):
216  """A decorator for registering gradient mappings."""
217 
218  def Wrapper(func):
219  cls.registry_[op_name] = func
220  return func
221 
222  return Wrapper
223 
224  @classmethod
225  def TranslateLayer(cls, layer, pretrained_blobs, is_test, **kwargs):
226  try:
227  caffe_ops, params = cls.registry_[layer.type](
228  layer, pretrained_blobs, is_test, **kwargs)
229  except KeyError:
230  raise KeyError('No translator registered for layer: %s yet.' %
231  str(layer))
232  if caffe_ops is None:
233  caffe_ops = []
234  if type(caffe_ops) is not list:
235  caffe_ops = [caffe_ops]
236  return caffe_ops, params
237 
238  @classmethod
239  def TranslateModel(
240  cls,
241  caffe_net,
242  pretrained_net,
243  is_test=False,
244  net_state=None,
245  remove_legacy_pad=False,
246  input_dims=None
247  ):
248  net_state = caffe_pb2.NetState() if net_state is None else net_state
249  net = caffe2_pb2.NetDef()
250  net.name = caffe_net.name
251  net_params = caffe2_pb2.TensorProtos()
252  if len(caffe_net.layers) > 0:
253  raise ValueError(
254  'I think something is wrong. This translation script '
255  'only accepts new style layers that are stored in the '
256  'layer field.'
257  )
258  if not input_dims:
259  input_dims = _GetInputDims(caffe_net)
260  for layer in caffe_net.layer:
261  if not _ShouldInclude(net_state, layer):
262  log.info('Current net state does not need layer {}'
263  .format(layer.name))
264  continue
265  log.info('Translate layer {}'.format(layer.name))
266  # Get pretrained one
267  pretrained_layers = (
268  [l for l in pretrained_net.layer
269  if l.name == layer.name] + [l
270  for l in pretrained_net.layers
271  if l.name == layer.name]
272  )
273  if len(pretrained_layers) > 1:
274  raise ValueError(
275  'huh? more than one pretrained layer of one name?')
276  elif len(pretrained_layers) == 1:
277  pretrained_blobs = [
278  utils.CaffeBlobToNumpyArray(blob)
279  for blob in pretrained_layers[0].blobs
280  ]
281  else:
282  # No pretrained layer for the given layer name. We'll just pass
283  # no parameter blobs.
284  # print 'No pretrained layer for layer', layer.name
285  pretrained_blobs = []
286  operators, params = cls.TranslateLayer(
287  layer, pretrained_blobs, is_test, net=net,
288  net_params=net_params, input_dims=input_dims)
289  net.op.extend(operators)
290  net_params.protos.extend(params)
291  if remove_legacy_pad:
292  assert input_dims, \
293  'Please specify input_dims to remove legacy_pad'
294  net = _RemoveLegacyPad(net, net_params, input_dims)
295  return net, net_params
296 
297 
298 def TranslateModel(*args, **kwargs):
299  return TranslatorRegistry.TranslateModel(*args, **kwargs)
300 
301 
302 def ConvertTensorProtosToInitNet(net_params, input_name):
303  """Takes the net_params returned from TranslateModel, and wrap it as an
304  init net that contain GivenTensorFill.
305 
306  This is a very simple feature that only works with float tensors, and is
307  only intended to be used in an environment where you want a single
308  initialization file - for more complex cases, use a db to store the
309  parameters.
310  """
311  init_net = caffe2_pb2.NetDef()
312  for tensor in net_params.protos:
313  if len(tensor.float_data) == 0:
314  raise RuntimeError(
315  "Only float tensors are supported in this util.")
316  op = core.CreateOperator(
317  "GivenTensorFill", [], [tensor.name],
318  arg=[
319  utils.MakeArgument("shape", list(tensor.dims)),
320  utils.MakeArgument("values", tensor.float_data)])
321  init_net.op.extend([op])
322  init_net.op.extend([core.CreateOperator("ConstantFill", [], [input_name], shape=[1])])
323  return init_net
324 
325 
326 def BaseTranslate(layer, caffe2_type):
327  """A simple translate interface that maps the layer input and output."""
328  caffe2_op = caffe2_pb2.OperatorDef()
329  caffe2_op.type = caffe2_type
330  caffe2_op.input.extend(layer.bottom)
331  caffe2_op.output.extend(layer.top)
332  return caffe2_op
333 
334 
335 def AddArgument(op, key, value):
336  """Makes an argument based on the value type."""
337  op.arg.extend([utils.MakeArgument(key, value)])
338 
339 ################################################################################
340 # Common translators for layers.
341 ################################################################################
342 
343 
344 @TranslatorRegistry.Register("Input")
345 def TranslateInput(layer, pretrained_blobs, is_test, **kwargs):
346  return [], []
347 
348 
349 @TranslatorRegistry.Register("VideoData")
350 def TranslateVideoData(layer, pretrained_blobs, is_test, **kwargs):
351  return [], []
352 
353 
354 @TranslatorRegistry.Register("Data")
355 def TranslateData(layer, pretrained_blobs, is_test, **kwargs):
356  return [], []
357 
358 
359 # A function used in convolution, pooling and deconvolution to deal with
360 # conv pool specific parameters.
361 def _TranslateStridePadKernelHelper(param, caffe_op):
362  try:
363  if (len(param.stride) > 1 or len(param.kernel_size) > 1 or
364  len(param.pad) > 1):
365  raise NotImplementedError(
366  "Translator currently does not support non-conventional "
367  "pad/kernel/stride settings."
368  )
369  stride = param.stride[0] if len(param.stride) else 1
370  pad = param.pad[0] if len(param.pad) else 0
371  kernel = param.kernel_size[0] if len(param.kernel_size) else 0
372  except TypeError:
373  # This catches the case of a PoolingParameter, in which case we are
374  # having non-repeating pad, stride and kernel.
375  stride = param.stride
376  pad = param.pad
377  kernel = param.kernel_size
378  # Get stride
379  if param.HasField("stride_h") or param.HasField("stride_w"):
380  AddArgument(caffe_op, "stride_h", param.stride_h)
381  AddArgument(caffe_op, "stride_w", param.stride_w)
382  else:
383  AddArgument(caffe_op, "stride", stride)
384  # Get pad
385  if param.HasField("pad_h") or param.HasField("pad_w"):
386  if param.pad_h == param.pad_w:
387  AddArgument(caffe_op, "pad", param.pad_h)
388  else:
389  AddArgument(caffe_op, "pad_t", param.pad_h)
390  AddArgument(caffe_op, "pad_b", param.pad_h)
391  AddArgument(caffe_op, "pad_l", param.pad_w)
392  AddArgument(caffe_op, "pad_r", param.pad_w)
393  else:
394  AddArgument(caffe_op, "pad", pad)
395  # Get kernel
396  if param.HasField("kernel_h") or param.HasField("kernel_w"):
397  AddArgument(caffe_op, "kernel_h", param.kernel_h)
398  AddArgument(caffe_op, "kernel_w", param.kernel_w)
399  else:
400  AddArgument(caffe_op, "kernel", kernel)
401 
402 
403 @TranslatorRegistry.Register("Convolution3D")
404 def TranslateConvNd(layer, pretrained_blobs, is_test, **kwargs):
405  param = layer.convolution3d_param
406  caffe_op = BaseTranslate(layer, "Conv")
407  output = caffe_op.output[0]
408  caffe_op.input.append(output + '_w')
409 
410  AddArgument(
411  caffe_op,
412  "kernels",
413  [param.kernel_depth, param.kernel_size, param.kernel_size])
414  AddArgument(
415  caffe_op,
416  "strides",
417  [param.temporal_stride, param.stride, param.stride])
418  temporal_pad = 0
419  spatial_pad = 0
420  if hasattr(param, 'temporal_pad'):
421  temporal_pad = param.temporal_pad
422  if hasattr(param, 'pad'):
423  spatial_pad = param.pad
424  AddArgument(caffe_op, "pads", [temporal_pad, spatial_pad, spatial_pad] * 2)
425 
426  # weight
427  params = [
428  utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')]
429  # bias
430  if len(pretrained_blobs) == 2:
431  caffe_op.input.append(output + '_b')
432  params.append(
433  utils.NumpyArrayToCaffe2Tensor(
434  pretrained_blobs[1].flatten(), output + '_b'))
435  return caffe_op, params
436 
437 
438 @TranslatorRegistry.Register("Convolution")
439 def TranslateConv(layer, pretrained_blobs, is_test, **kwargs):
440  param = layer.convolution_param
441  caffe_op = BaseTranslate(layer, "Conv")
442  output = caffe_op.output[0]
443  caffe_op.input.append(output + '_w')
444  _TranslateStridePadKernelHelper(param, caffe_op)
445  # weight
446  params = [
447  utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')]
448  # bias
449  if len(pretrained_blobs) == 2:
450  caffe_op.input.append(output + '_b')
451  params.append(
452  utils.NumpyArrayToCaffe2Tensor(
453  pretrained_blobs[1].flatten(), output + '_b'))
454  # Group convolution option
455  if param.group != 1:
456  AddArgument(caffe_op, "group", param.group)
457  # Get dilation - not tested. If you have a model and this checks out,
458  # please provide a test and uncomment this.
459  if len(param.dilation) > 0:
460  if len(param.dilation) == 1:
461  AddArgument(caffe_op, "dilation", param.dilation[0])
462  elif len(param.dilation) == 2:
463  AddArgument(caffe_op, "dilation_h", param.dilation[0])
464  AddArgument(caffe_op, "dilation_w", param.dilation[1])
465  return caffe_op, params
466 
467 
468 @TranslatorRegistry.Register("Deconvolution")
469 def TranslateDeconv(layer, pretrained_blobs, is_test, **kwargs):
470  param = layer.convolution_param
471  if param.group > 1:
472  raise NotImplementedError(
473  "Translator currently does not support group deconvolution."
474  )
475  caffe_op = BaseTranslate(layer, "ConvTranspose")
476  output = caffe_op.output[0]
477  _TranslateStridePadKernelHelper(param, caffe_op)
478  caffe_op.input.extend([output + '_w'])
479  AddArgument(caffe_op, "order", "NCHW")
480  weight = utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')
481  if param.bias_term:
482  bias = utils.NumpyArrayToCaffe2Tensor(
483  pretrained_blobs[1].flatten(), output + '_b'
484  )
485  caffe_op.input.extend([output + '_b'])
486  return caffe_op, [weight, bias]
487  else:
488  return caffe_op, [weight]
489 
490 
491 @TranslatorRegistry.Register("Crop")
492 def TranslateCrop(layer, pretrained_blobs, is_test, **kwargs):
493  net, net_params, input_dims = kwargs['net'], kwargs['net_params'], kwargs['input_dims']
494  n, c, h, w = input_dims
495  dummy_input = np.random.randn(n, c, h, w).astype(np.float32)
496  dim_map = _GetBlobDimMap(net, net_params, dummy_input)
497  param = layer.crop_param
498  axis, offsets = param.axis, param.offset
499  caffe_op = BaseTranslate(layer, "Slice")
500  input_1 = caffe_op.input[1]
501  input_1_dim = dim_map[input_1]
502  starts, ends = [], []
503  dims = len(dim_map[input_1])
504  assert len(offsets) == 1, 'Caffe Translator for Crop only works for offset \
505  of 1 for now'
506  for _ in range(axis):
507  starts.append(0)
508  ends.append(-1)
509  end_offset = [int(offsets[0] + input_1_dim[i]) for i in range(axis, dims)]
510  ends.extend(end_offset)
511  starts.extend([offsets[0]] * len(end_offset))
512  op = caffe2_pb2.OperatorDef()
513  op.input.extend([caffe_op.input[0]])
514  op.output.extend(caffe_op.output)
515  op.arg.extend(caffe_op.arg)
516  op.type = caffe_op.type
517  AddArgument(op, "starts", starts)
518  AddArgument(op, "ends", ends)
519  return op, []
520 
521 @TranslatorRegistry.Register("ReLU")
522 def TranslateRelu(layer, pretrained_blobs, is_test, **kwargs):
523  return BaseTranslate(layer, "Relu"), []
524 
525 
526 @TranslatorRegistry.Register("Pooling")
527 def TranslatePool(layer, pretrained_blobs, is_test, **kwargs):
528  param = layer.pooling_param
529  if param.pool == caffe_pb2.PoolingParameter.MAX:
530  caffe_op = BaseTranslate(layer, "MaxPool")
531  elif param.pool == caffe_pb2.PoolingParameter.AVE:
532  caffe_op = BaseTranslate(layer, "AveragePool")
533  _TranslateStridePadKernelHelper(param, caffe_op)
534  AddArgument(caffe_op, "order", "NCHW")
535  try:
536  # In the Facebook port of Caffe, a torch_pooling field was added to
537  # map the pooling computation of Torch. Essentially, it uses
538  # floor((height + 2 * padding - kernel) / stride) + 1
539  # instead of
540  # ceil((height + 2 * padding - kernel) / stride) + 1
541  # which is Caffe's version.
542  # Torch pooling is actually the same as Caffe2 pooling, so we don't
543  # need to do anything.
544  is_torch_pooling = param.torch_pooling
545  except AttributeError:
546  is_torch_pooling = False
547  if not is_torch_pooling:
548  AddArgument(caffe_op, "legacy_pad",
549  caffe2_legacy_pb2.CAFFE_LEGACY_POOLING)
550  if param.global_pooling:
551  AddArgument(caffe_op, "global_pooling", 1)
552  return caffe_op, []
553 
554 
555 @TranslatorRegistry.Register("Pooling3D")
556 def TranslatePool3D(layer, pretrained_blobs, is_test, **kwargs):
557  param = layer.pooling3d_param
558  if param.pool == caffe_pb2.Pooling3DParameter.MAX:
559  caffe_op = BaseTranslate(layer, "MaxPool")
560 
561  elif param.pool == caffe_pb2.Pooling3DParameter.AVE:
562  caffe_op = BaseTranslate(layer, "AveragePool")
563  AddArgument(caffe_op, "order", "NCHW")
564  AddArgument(
565  caffe_op,
566  "kernels",
567  [param.kernel_depth, param.kernel_size, param.kernel_size])
568 
569  AddArgument(
570  caffe_op,
571  "strides",
572  [param.temporal_stride, param.stride, param.stride])
573  temporal_pad = 0
574  spatial_pad = 0
575  if hasattr(param, 'temporal_pad'):
576  temporal_pad = param.temporal_pad
577  if hasattr(param, 'pad'):
578  spatial_pad = param.pad
579  AddArgument(caffe_op, "pads", [temporal_pad, spatial_pad, spatial_pad] * 2)
580  return caffe_op, []
581 
582 
583 @TranslatorRegistry.Register("LRN")
584 def TranslateLRN(layer, pretrained_blobs, is_test, **kwargs):
585  caffe_op = BaseTranslate(layer, "LRN")
586  caffe_op.output.extend(['_' + caffe_op.output[0] + '_scale'])
587  param = layer.lrn_param
588  if param.norm_region != caffe_pb2.LRNParameter.ACROSS_CHANNELS:
589  raise ValueError(
590  "Does not support norm region other than across channels.")
591  AddArgument(caffe_op, "size", int(param.local_size))
592  AddArgument(caffe_op, "alpha", float(param.alpha))
593  AddArgument(caffe_op, "beta", float(param.beta))
594  AddArgument(caffe_op, "bias", float(param.k))
595  AddArgument(caffe_op, "order", "NCHW")
596  return caffe_op, []
597 
598 
599 @TranslatorRegistry.Register("InnerProduct")
600 def TranslateInnerProduct(layer, pretrained_blobs, is_test, **kwargs):
601  param = layer.inner_product_param
602  try:
603  if param.axis != 1 or param.transpose:
604  raise ValueError(
605  "We don't have testing case for non-default axis and transpose "
606  "cases yet so we are disabling it for now. If you have a model "
607  "with this, please do send us your model for us to update this "
608  "support, and you are more than welcome to send a PR for this.")
609  except AttributeError:
610  # We might be using an historic Caffe protobuf that does not have axis
611  # and transpose arguments, so we will silently pass.
612  pass
613  caffe_op = BaseTranslate(layer, "FC")
614  output = caffe_op.output[0]
615  caffe_op.input.extend([output + '_w', output + '_b'])
616  # To provide the old-style 4-dimensional blob (1, 1, dim_output, dim_input)
617  # case, we always explicitly reshape the pretrained blob.
618  if pretrained_blobs[0].ndim not in [2, 4]:
619  raise ValueError("Unexpected weight ndim.")
620  if (pretrained_blobs[0].ndim == 4 and
621  list(pretrained_blobs[0].shape[:2]) != [1, 1]):
622  raise ValueError(
623  "If pretrained blob has 4 dims (old-style Caffe), the first two "
624  "should be of value 1, but I got " + str(pretrained_blobs[0].shape))
625  weight = utils.NumpyArrayToCaffe2Tensor(
626  pretrained_blobs[0].reshape(-1, pretrained_blobs[0].shape[-1]),
627  output + '_w'
628  )
629  bias = utils.NumpyArrayToCaffe2Tensor(
630  pretrained_blobs[1].flatten(), output + '_b'
631  )
632  return caffe_op, [weight, bias]
633 
634 
635 @TranslatorRegistry.Register("Dropout")
636 def TranslateDropout(layer, pretrained_blobs, is_test, **kwargs):
637  caffe_op = BaseTranslate(layer, "Dropout")
638  caffe_op.output.extend(['_' + caffe_op.output[0] + '_mask'])
639  param = layer.dropout_param
640  AddArgument(caffe_op, "ratio", param.dropout_ratio)
641  if (is_test):
642  AddArgument(caffe_op, "is_test", 1)
643  return caffe_op, []
644 
645 
646 @TranslatorRegistry.Register("Softmax")
647 def TranslateSoftmax(layer, pretrained_blobs, is_test, **kwargs):
648  caffe_op = BaseTranslate(layer, "Softmax")
649  return caffe_op, []
650 
651 
652 @TranslatorRegistry.Register("SoftmaxWithLoss")
653 def TranslateSoftmaxWithLoss(layer, pretrained_blobs, is_test, **kwargs):
654  softmax_op = core.CreateOperator(
655  "Softmax", [layer.bottom[0]],
656  layer.bottom[0] + "_translator_autogen_softmax")
657  xent_op = core.CreateOperator(
658  "LabelCrossEntropy",
659  [softmax_op.output[0], layer.bottom[1]],
660  layer.bottom[0] + "_translator_autogen_xent")
661  loss_op = core.CreateOperator(
662  "AveragedLoss",
663  xent_op.output[0],
664  layer.top[0])
665  return [softmax_op, xent_op, loss_op], []
666 
667 
668 @TranslatorRegistry.Register("Accuracy")
669 def TranslateAccuracy(layer, pretrained_blobs, is_test, **kwargs):
670  caffe_op = BaseTranslate(layer, "Accuracy")
671  if layer.accuracy_param.top_k != 1:
672  AddArgument(caffe_op, "top_k", layer.accuracy_param.top_k)
673  return caffe_op, []
674 
675 
676 @TranslatorRegistry.Register("Concat")
677 def TranslateConcat(layer, pretrained_blobs, is_test, **kwargs):
678  caffe_op = BaseTranslate(layer, "Concat")
679  caffe_op.output.extend(['_' + caffe_op.output[0] + '_dims'])
680  AddArgument(caffe_op, "order", "NCHW")
681  return caffe_op, []
682 
683 
684 @TranslatorRegistry.Register("TanH")
685 def TranslateTanH(layer, pretrained_blobs, is_test, **kwargs):
686  caffe_op = BaseTranslate(layer, "Tanh")
687  return caffe_op, []
688 
689 
690 @TranslatorRegistry.Register("InstanceNorm")
691 def TranslateInstanceNorm(layer, pretrained_blobs, is_test, **kwargs):
692  caffe_op = BaseTranslate(layer, "InstanceNorm")
693  output = caffe_op.output[0]
694  weight = utils.NumpyArrayToCaffe2Tensor(
695  pretrained_blobs[0].flatten(), output + '_w')
696  bias = utils.NumpyArrayToCaffe2Tensor(
697  pretrained_blobs[1].flatten(), output + '_b')
698  caffe_op.input.extend([output + '_w', output + '_b'])
699  AddArgument(caffe_op, "order", "NCHW")
700  return caffe_op, [weight, bias]
701 
702 
703 @TranslatorRegistry.Register("BatchNorm")
704 def TranslateBatchNorm(layer, pretrained_blobs, is_test, **kwargs):
705  caffe_op = BaseTranslate(layer, "SpatialBN")
706  output = caffe_op.output[0]
707  param = layer.batch_norm_param
708  AddArgument(caffe_op, "is_test", is_test)
709  AddArgument(caffe_op, "epsilon", param.eps)
710  AddArgument(caffe_op, "order", "NCHW")
711 
712  caffe_op.input.extend(
713  [output + "_scale",
714  output + "_bias",
715  output + "_mean",
716  output + "_var"])
717  if not is_test:
718  caffe_op.output.extend(
719  [output + "_mean",
720  output + "_var",
721  output + "_saved_mean",
722  output + "_saved_var"])
723 
724  n_channels = pretrained_blobs[0].shape[0]
725  if pretrained_blobs[2][0] != 0:
726  mean = utils.NumpyArrayToCaffe2Tensor(
727  (1. / pretrained_blobs[2][0]) * pretrained_blobs[0],
728  output + '_mean')
729  var = utils.NumpyArrayToCaffe2Tensor(
730  (1. / pretrained_blobs[2][0]) * pretrained_blobs[1],
731  output + '_var')
732  else:
733  raise RuntimeError("scalar is zero.")
734  pretrained_blobs[2][0] = 1
735  pretrained_blobs[2] = np.tile(pretrained_blobs[2], (n_channels, ))
736  scale = utils.NumpyArrayToCaffe2Tensor(
737  pretrained_blobs[2],
738  output + '_scale')
739  bias = utils.NumpyArrayToCaffe2Tensor(
740  np.zeros_like(pretrained_blobs[2]),
741  output + '_bias')
742 
743  return caffe_op, [scale, bias, mean, var]
744 
745 
746 @TranslatorRegistry.Register("Eltwise")
747 def TranslateElementWise(layer, pretrained_blobs, is_test, **kwargs):
748  param = layer.eltwise_param
749  # TODO(jiayq): if we have a protobuf that uses this, lift this constraint
750  # and verify that we can correctly translate.
751  if len(param.coeff) or param.operation != 1:
752  raise RuntimeError("This eltwise layer is not yet supported.")
753  caffe_op = BaseTranslate(layer, "Sum")
754  return caffe_op, []
755 
756 
757 @TranslatorRegistry.Register("Scale")
758 def TranslateScale(layer, pretrained_blobs, is_test, **kwargs):
759  mul_op = BaseTranslate(layer, "Mul")
760  scale_param = layer.scale_param
761  AddArgument(mul_op, "axis", scale_param.axis)
762  AddArgument(mul_op, "broadcast", True)
763  if len(mul_op.input) == 1:
764  # the scale parameter is in pretrained blobs
765  if scale_param.num_axes != 1:
766  raise RuntimeError("This path has not been verified yet.")
767 
768  output = mul_op.output[0]
769  mul_op_param = output + '_w'
770  mul_op.input.append(mul_op_param)
771  weights = []
772  weights.append(utils.NumpyArrayToCaffe2Tensor(
773  pretrained_blobs[0].flatten(), mul_op_param))
774 
775  add_op = None
776  if len(pretrained_blobs) == 1:
777  # No bias-term in Scale layer
778  pass
779  elif len(pretrained_blobs) == 2:
780  # Caffe Scale layer supports a bias term such that it computes
781  # (scale_param * X + bias), whereas Caffe2 Mul op doesn't.
782  # Include a separate Add op for the bias followed by Mul.
783  add_op = copy.deepcopy(mul_op)
784  add_op.type = "Add"
785  add_op_param = output + '_b'
786  internal_blob = output + "_internal"
787  del mul_op.output[:]
788  mul_op.output.append(internal_blob)
789  del add_op.input[:]
790  add_op.input.append(internal_blob)
791  add_op.input.append(add_op_param)
792  weights.append(utils.NumpyArrayToCaffe2Tensor(
793  pretrained_blobs[1].flatten(), add_op_param))
794  else:
795  raise RuntimeError("Unexpected number of pretrained blobs in Scale")
796 
797  caffe_ops = [mul_op]
798  if add_op:
799  caffe_ops.append(add_op)
800  assert len(caffe_ops) == len(weights)
801  return caffe_ops, weights
802  elif len(mul_op.input) == 2:
803  # TODO(jiayq): find a protobuf that uses this and verify.
804  raise RuntimeError("This path has not been verified yet.")
805  else:
806  raise RuntimeError("Unexpected number of inputs.")
807 
808 
809 @TranslatorRegistry.Register("Reshape")
810 def TranslateReshape(layer, pretrained_blobs, is_test, **kwargs):
811  caffe_op = BaseTranslate(layer, "Reshape")
812  caffe_op.output.append("_" + caffe_op.input[0] + "_dims")
813  reshape_param = layer.reshape_param
814  AddArgument(caffe_op, 'shape', reshape_param.shape.dim)
815  return caffe_op, []
816 
817 
818 @TranslatorRegistry.Register("Flatten")
819 def TranslateFlatten(layer, pretrained_blobs, is_test, **kwargs):
820  param = layer.flatten_param
821  if param.end_axis != -1:
822  raise NotImplementedError("flatten_param.end_axis not supported yet.")
823 
824  if param.axis == 0:
825  caffe_op = BaseTranslate(layer, "FlattenToVec")
826  elif param.axis == 1:
827  caffe_op = BaseTranslate(layer, "Flatten")
828  else:
829  # This could be a Reshape op, but dim size is not known here.
830  raise NotImplementedError(
831  "Not supported yet for flatten_param.axis {}.".format(param.axis))
832 
833  return caffe_op, []
834 
835 
836 @TranslatorRegistry.Register("Sigmoid")
837 def TranslateSigmoid(layer, pretrained_blobs, is_test, **kwargs):
838  caffe_op = BaseTranslate(layer, "Sigmoid")
839  return caffe_op, []
840 
841 
842 @TranslatorRegistry.Register("ROIPooling")
843 def TranslateROIPooling(layer, pretrained_blobs, is_test, **kwargs):
844  caffe_op = BaseTranslate(layer, "RoIPool")
845  AddArgument(caffe_op, "order", "NCHW")
846 
847  if is_test:
848  AddArgument(caffe_op, "is_test", is_test)
849  else:
850  # Only used for gradient computation
851  caffe_op.output.append(caffe_op.output[0] + '_argmaxes')
852 
853  param = layer.roi_pooling_param
854  if param.HasField('pooled_h'):
855  AddArgument(caffe_op, 'pooled_h', param.pooled_h)
856  if param.HasField('pooled_w'):
857  AddArgument(caffe_op, 'pooled_w', param.pooled_w)
858  if param.HasField('spatial_scale'):
859  AddArgument(caffe_op, 'spatial_scale', param.spatial_scale)
860 
861  return caffe_op, []
862 
863 
864 @TranslatorRegistry.Register("PReLU")
865 def TranslatePRelu(layer, pretrained_blobs, is_test, **kwargs):
866  caffe_op = BaseTranslate(layer, "PRelu")
867  output = caffe_op.output[0]
868  caffe_op.input.extend([output + '_Slope'])
869  slope = utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_Slope')
870 
871  return caffe_op, [slope]
872 
873 
874 @TranslatorRegistry.Register("Reduction")
875 def TranslateReduction(layer, pretrained_blobs, is_test, **kwargs):
876  param = layer.reduction_param
877  if param.operation == caffe_pb2.ReductionParameter.SUM:
878  caffe_op = BaseTranslate(layer, "ReduceBackSum")
879  elif param.operation == caffe_pb2.ReductionParameter.MEAN:
880  caffe_op = BaseTranslate(layer, "ReduceBackMean")
881  else:
882  raise NotImplementedError("Not yet supported")
883 
884  if param.axis > 0:
885  # We can't figure out the number of dims to reduce from positive axis
886  # for back reduction since the shape info is not known here.
887  raise NotImplementedError("Not yet supported")
888  num_reduce_dim = -param.axis
889  AddArgument(caffe_op, "num_reduce_dim", num_reduce_dim)
890 
891  return caffe_op, []
892 
893 
894 if __name__ == '__main__':
895  parser = argparse.ArgumentParser(
896  description="Utilitity to convert pretrained caffe models to Caffe2 models.")
897  parser.add_argument("prototext", help="Caffe prototext.")
898  parser.add_argument("caffemodel", help="Caffe trained model.")
899  parser.add_argument("--init_net", help="Caffe2 initialization net.",
900  default="init_net.pb")
901  parser.add_argument("--predict_net", help="Caffe2 prediction net.",
902  default="predict_net.pb")
903  parser.add_argument("--remove_legacy_pad", help="Remove legacy pad \
904  (Only works for nets with one input blob)",
905  action="store_true",
906  default=False)
907  parser.add_argument("--input_dims", help="Dimension of input blob", nargs='+',
908  type=int, default=[])
909  args = parser.parse_args()
910 
911  caffenet = caffe_pb2.NetParameter()
912  caffenet_pretrained = caffe_pb2.NetParameter()
913  input_proto = args.prototext
914  input_caffemodel = args.caffemodel
915  output_init_net = args.init_net
916  output_predict_net = args.predict_net
917 
918  text_format.Merge(
919  open(input_proto, 'r').read(), caffenet
920  )
921  caffenet_pretrained.ParseFromString(
922  open(input_caffemodel, 'rb').read()
923  )
924  net, pretrained_params = TranslateModel(
925  caffenet, caffenet_pretrained, is_test=True,
926  remove_legacy_pad=args.remove_legacy_pad,
927  input_dims=args.input_dims
928  )
929 
930  # Assume there is one input and one output
931  external_input = net.op[0].input[0]
932  external_output = net.op[-1].output[0]
933 
934  net.external_input.extend([external_input])
935  net.external_input.extend([param.name for param in pretrained_params.protos])
936  net.external_output.extend([external_output])
937  init_net = ConvertTensorProtosToInitNet(pretrained_params, external_input)
938 
939  with open(output_predict_net, 'wb') as f:
940  f.write(net.SerializeToString())
941  with open(output_predict_net + 'txt', 'w') as f:
942  f.write(str(net))
943  with open(output_init_net, 'wb') as f:
944  f.write(init_net.SerializeToString())
def TranslateLayer(cls, layer, pretrained_blobs, is_test, kwargs)