Caffe2 - Python API
A deep learning, cross platform ML framework
model_helper.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package model_helper
17 # Module caffe2.python.model_helper
18 from __future__ import absolute_import
19 from __future__ import division
20 from __future__ import print_function
21 from __future__ import unicode_literals
22 
23 from caffe2.python import core, scope, workspace, helpers
24 from caffe2.python.modeling import parameter_info
26  parameter_sharing_context,
27 )
29  OptimizerContext,
30  DEFAULT_OPTIM,
31 )
32 from caffe2.python.regularizer_context import RegularizerContext
33 
34 from future.utils import viewitems, viewkeys
35 from itertools import chain
36 
37 import logging
38 import six
39 
40 
41 # _known_working_ops are operators that do not need special care.
42 _known_working_ops = [
43  "Accuracy",
44  "Adam",
45  "Add",
46  "Adagrad",
47  "SparseAdagrad",
48  "AveragedLoss",
49  "Cast",
50  "Checkpoint",
51  "ConstantFill",
52  "Copy",
53  "CopyGPUToCPU",
54  "CopyCPUToGPU",
55  "DequeueBlobs",
56  "EnsureCPUOutput",
57  "ExpandDims",
58  "Flatten",
59  "FlattenToVec",
60  "LabelCrossEntropy",
61  "LearningRate",
62  "MakeTwoClass",
63  "MatMul",
64  "NCCLAllreduce",
65  "NHWC2NCHW",
66  "PackSegments",
67  "Print",
68  "PRelu",
69  "ReduceFrontSum",
70  "Scale",
71  "ScatterWeightedSum",
72  "Sigmoid",
73  "SortedSegmentSum",
74  "Snapshot", # Note: snapshot is deprecated, use Checkpoint
75  "Softmax",
76  "SoftmaxWithLoss",
77  "SquaredL2Distance",
78  "Squeeze",
79  "StopGradient",
80  "Summarize",
81  "Tanh",
82  "Transpose",
83  "UnpackSegments",
84  "WeightedSum",
85  "YellowFin"
86 ]
87 
88 
89 class ModelHelper(object):
90  """A helper model so we can manange models more easily. It contains net def
91  and parameter storages. You can add an Operator yourself, e.g.
92 
93  model = model_helper.ModelHelper(name="train_net")
94  # init your weight and bias as w and b
95  w = model.param_init_net.XavierFill(...)
96  b = model.param_init_net.ConstantFill(...)
97  fc1 = model.FC([input, w, b], output, **kwargs)
98 
99  or you can use helper functions in brew module without manually
100  defining parameter initializations and operators.
101 
102  model = model_helper.ModelHelper(name="train_net")
103  fc1 = brew.fc(model, input, output, dim_in, dim_out, **kwargs)
104 
105  """
106 
107  def __init__(self, name=None, init_params=True, allow_not_known_ops=True,
108  skip_sparse_optim=False, param_model=None, arg_scope=None):
109  self.name = name or "model"
110  self.net = core.Net(self.name)
111 
112  if param_model is not None:
113  self.param_init_net = param_model.param_init_net
114  self.param_to_grad = param_model.param_to_grad
115  self.params = param_model.params
116  self._parameters_info = param_model._parameters_info
117  self._computed_params = param_model._computed_params
118  else:
119  self.param_init_net = core.Net(self.name + '_init')
120  self.param_to_grad = {}
121  self.params = []
122  self._parameters_info = {}
123  self._computed_params = []
124 
125  self._param_info_deprecated = []
126  self._devices = []
127  self.gradient_ops_added = False
128  self.init_params = init_params
129  self.allow_not_known_ops = allow_not_known_ops
130  self.skip_sparse_optim = skip_sparse_optim
131  self.weights = []
132  self.biases = []
133  self._arg_scope = {
134  'order': "NCHW",
135  'use_cudnn': True,
136  'cudnn_exhaustive_search': False,
137  }
138  if arg_scope is not None:
139  # Please notice value as None is not acceptable. We are not checking it
140  # here because we already have check in MakeArgument.
141  self._arg_scope.update(arg_scope)
142 
143  @property
144  def arg_scope(self):
145  return self._arg_scope
146 
147  def get_name(self):
148  return self.name
149 
150  def _infer_param_shape(self, param):
151  for op in self.param_init_net.Proto().op:
152  if str(param) in op.output:
153  for arg in op.arg:
154  if arg.name == "shape":
155  return list(arg.ints)
156  return None
157 
158  def _update_param_info_deprecated(self):
159  assert len(self._param_info_deprecated) <= len(self.params)
160  for param in self.params[len(self._param_info_deprecated):]:
161  if not isinstance(param, core.BlobReference):
162  raise ValueError(
163  "Param %s must be a BlobReference!" % str(param))
164  self._param_info_deprecated.append(parameter_info.ParameterInfo(
165  param_id=len(self._param_info_deprecated),
166  param=param,
167  shape=self._infer_param_shape(param)))
168  for info in self._param_info_deprecated:
169  info.grad = self.param_to_grad.get(info.name)
170 
171  def _normalize_tags(self, tags):
172  tags = tags or []
173  return set(tags) if isinstance(tags, list) else set([tags])
174 
175  def create_param(self, param_name, shape, initializer, tags=None):
176  """
177  Creates parameter with a given name and initializer.
178 
179  If param_name is instance of BlobRefernce - then this blob will be used
180  to store parameter (no any logic will affect it's location).
181 
182  If param_name is instance of a string type, then the final blob will
183  be created in the CurrentNameScope with the respect of all parameter
184  sharing logic, i.e. 'resolved_name_scope/param_name'.
185 
186  Parameter sharing logic is going to override CurrentNameScope accoring
187  to the rules that are specified through ParameterSharing contexts,
188  all ParameterSharing contexts are applied recursively until there are no
189  extra overrides present, where on each step the best match will be
190  applied first.
191 
192  The following examples should clarify the way ParameterSharing logic
193  works:
194 
195  As an example if this function is called with parameter 'w':
196  a. Call from some scope 'global_scope' with no Parameter sharing:
197  'global_scope/w'
198  b. Call from scope 'scope_b', with override {'scope_b': 'scope_a'}:
199  'scope_a/w'
200  c. Call from scope 'scope_a', with override {'scope_a': ''}:
201  'scope_a/w'
202  d. Call from scope 'scope_b/shared', with overrides
203  {'scope_b/shared': 'scope_b', 'scope_b': 'scope_a'}:
204  'scope_a/w'
205  d. Call from scope 'scope_b/unshared', with overrides
206  {'scope_b/shared': 'scope_b', 'scope_b': 'scope_a'}:
207  'scope_a/unshared/w'
208  """
209  # ParameterSharing works only for case when param_name is instance of
210  # a string type. If param_name is a BlobReference - no attempt for
211  # ParameterSharing will be applied.
212  if isinstance(param_name, core.BlobReference):
213  param_name = str(param_name)
214  elif isinstance(param_name, six.string_types):
215  # Parameter name will be equal to current Namescope that got
216  # resolved with the respect of parameter sharing of the scopes.
217  param_name = parameter_sharing_context.get_parameter_name(
218  param_name)
219  else:
220  raise "Unsupported type for param_name"
221 
222  if param_name in self._parameters_info:
223  assert self._parameters_info[param_name].shape == shape
224  return self._parameters_info[param_name].blob
225 
226  param_info = initializer.create_param(
227  param_name=core.BlobReference(param_name),
228  init_net=self.param_init_net,
229  shape=shape,
230  )
231  optim_context = OptimizerContext.current()
232  for tag in self._normalize_tags(tags):
233  if optim_context.has_optimizer(tag):
234  # param_info will check optimizer has not been set
235  param_info.optimizer = optim_context.get_optimizer(tag)
236  if not param_info.optimizer and optim_context.has_optimizer(DEFAULT_OPTIM):
237  param_info.optimizer = optim_context.get_optimizer(DEFAULT_OPTIM)
238 
239  reg_context = RegularizerContext.current()
240  param_info.regularizer = reg_context
241 
242  self._parameters_info[param_name] = param_info
243  # Add param to legacy structs as well, so all other functions for
244  # parameters are still working.
245  self.AddParameter(param_info.blob, tags)
246  return param_info.blob
247 
248  def get_param_info(self, param):
249  assert isinstance(param, core.BlobReference), \
250  "Param {} is not a BlobReference".format(param)
251  return self._parameters_info.get(param, None)
252 
253  # This method is deprecated, use create_param method which
254  # also does parameter initialization when needed
255  def add_param_DEPRECATED(self, param, key=None, shape=None, length=None):
256  logging.warning("add_param method is DEPRECATED")
258  self.AddParameter(param)
259  if key is not None and self.net.input_record() is not None:
260  idx = self.net.input_record().field_blobs().index(key)
261  key = self.net.input_record().field_names()[idx]
262  shape = shape if shape is not None else self._infer_param_shape(param)
263  if not isinstance(param, core.BlobReference):
264  raise ValueError("Param %s must be a BlobReference!" % str(param))
265  self._param_info_deprecated.append(parameter_info.ParameterInfo(
266  param_id=len(self._param_info_deprecated),
267  param=param,
268  shape=shape,
269  key=key,
270  length=length,
271  ))
272  return self._param_info_deprecated[-1]
273 
274  # This method is deprecated, use get_param_info method
275  def param_info(self, grad_type=None, id=None):
276  logging.info("param_info method is DEPRECATED")
278  if id is not None:
279  assert grad_type is None
280  info = self._param_info_deprecated[id]
281  assert info.param_id == id
282  return info
283  elif grad_type is not None:
284  return [
285  info for info in self._param_info_deprecated
286  if info.grad_type() == grad_type]
287  else:
288  return self._param_info_deprecated
289 
290  def AddParameter(self, param, tags=None):
291  assert isinstance(param, core.BlobReference)
292  tags = self._normalize_tags(tags)
293  if parameter_info.ParameterTags.COMPUTED_PARAM in tags:
294  self._computed_params.append(param)
295  else:
296  self.params.append(param)
297 
298  if parameter_info.ParameterTags.WEIGHT in tags:
299  self.weights.append(param)
300  if parameter_info.ParameterTags.BIAS in tags:
301  self.biases.append(param)
302 
303  @staticmethod
304  def _NormalizeNamescope(namescope):
305  if namescope is None:
306  return scope.CurrentNameScope()
307  elif namescope == '' or namescope.endswith(scope._NAMESCOPE_SEPARATOR):
308  return namescope
309  else:
310  return namescope + scope._NAMESCOPE_SEPARATOR
311 
312  def GetParams(self, namescope=None, top_scope=False):
313  '''
314  Returns the params in current namescope
315  '''
316  namescope = ModelHelper._NormalizeNamescope(namescope)
317 
318  if namescope == '':
319  return self.params[:]
320  elif top_scope:
321  return [
322  p for p in self.params
323  if p.GetNameScope().startswith(namescope)
324  ]
325  else:
326  return [p for p in self.params if
327  p.GetNameScope().startswith(namescope)]
328 
329  def Proto(self):
330  return self.net.Proto()
331 
332  def InitProto(self):
333  return self.param_init_net.Proto()
334 
335  def RunAllOnGPU(self, *args, **kwargs):
336  self.param_init_net.RunAllOnGPU(*args, **kwargs)
337  self.net.RunAllOnGPU(*args, **kwargs)
338 
339  def CreateDB(self, blob_out, db, db_type, **kwargs):
340  dbreader = self.param_init_net.CreateDB(
341  [], blob_out, db=db, db_type=db_type, **kwargs)
342  return dbreader
343 
344  def AddGradientOperators(self, *args, **kwargs):
345  if self.gradient_ops_added:
346  raise RuntimeError("You cannot run AddGradientOperators twice.")
347  self.Validate()
348 
349  self.gradient_ops_added = True
350  self.grad_map = self.net.AddGradientOperators(*args, **kwargs)
351  self.param_to_grad = self.get_param_to_grad(self.params)
352 
353  # Populate ParameterInfo for all parameters if missing
354  # and add gradient blob information. So optimizers can use it
355  for param, grad in self.param_to_grad.items():
356  param_info = self.get_param_info(param)
357  if param_info:
358  param_info.grad = grad
359  else:
360  self._parameters_info[param] = parameter_info.ParameterInfo(
361  param_id=None,
362  param=param,
363  grad=grad,
364  )
365 
366  return self.grad_map
367 
368  def get_param_to_grad(self, params):
369  '''
370  Given a list of parameters returns a dict from a parameter
371  to a corresponding gradient
372  '''
373 
374  param_to_grad = {}
375  if not self.gradient_ops_added:
376  raise RuntimeError("You need to run AddGradientOperators first.")
377  # We need to use empty namescope when creating the gradients
378  # to prevent duplicating the namescope prefix for gradient blobs.
379  for p in params:
380  if str(p) in self.grad_map:
381  param_to_grad[p] = self.grad_map[str(p)]
382  return param_to_grad
383 
384  def GetOptimizationParamInfo(self, params=None):
385  '''
386  Returns a map for param => grad.
387  If params is not specified, all parameters will be considered.
388  '''
389  if not self.gradient_ops_added:
390  raise RuntimeError("Need to call AddGradientOperators first")
391 
392  param_to_grad = self.param_to_grad
393  if params:
394  param_to_grad = self.get_param_to_grad(params)
395 
396  return [
397  self.get_param_info(param) for param, grad in viewitems(param_to_grad)
398  if (
399  not self.skip_sparse_optim or
400  not isinstance(grad, core.GradientSlice)
401  )
402  ]
403 
404  def _Validate(self):
405  '''
406  Check for duplicate params
407  '''
408  params_list = [str(p) for p in self.params]
409  params_set = set(params_list)
410 
411  dupes = []
412  if len(params_set) != len(params_list):
413  params_list = sorted(params_list)
414  for j, p in enumerate(params_list):
415  if j > 0 and params_list[j - 1] == p:
416  if p not in dupes:
417  dupes.append(p)
418 
419  return dupes
420 
421  def Validate(self):
422  dupes = self._Validate()
423  assert dupes == [], "Duplicate params: {}".format(dupes)
424 
425  def GetComputedParams(self, namescope=None):
426  '''
427  Returns the computed params in current namescope. 'Computed params'
428  are such parameters that are not optimized via gradient descent but are
429  directly computed from data, such as the running mean and variance
430  of Spatial Batch Normalization.
431  '''
432  namescope = ModelHelper._NormalizeNamescope(namescope)
433 
434  if namescope == '':
435  return self._computed_params[:]
436  else:
437  return [p for p in self._computed_params
438  if p.GetNameScope().startswith(namescope)]
439 
440  def GetAllParams(self, namescope=None):
441  return self.GetParams(namescope) + self.GetComputedParams(namescope)
442 
444  self, unused_blob_in, blob_out, batch_size, db, db_type, **kwargs
445  ):
446  """TensorProtosDBInput."""
447  assert len(unused_blob_in) == 0, \
448  """You cannot pass reader to model_helper.TensorProtosDBInput.
449  Use model.net.TensorProtosDBInput instead to create the op."""
450 
451  return helpers.db_input.db_input(
452  self, blob_out, batch_size, db, db_type, **kwargs)
453 
454  def GetDevices(self):
455  assert len(self._devices) > 0, \
456  "Use data_parallel_model to run model on multiple GPUs."
457  return self._devices
458 
459  def __getattr__(self, op_type):
460  """Catch-all for all other operators, mostly those without params."""
461  if op_type.startswith('__'):
462  raise AttributeError(op_type)
463 
464  if not core.IsOperator(op_type):
465  raise AttributeError(
466  'Method ' + op_type + ' is not a registered operator.' +
467  ' Did you mean: [' +
468  ','.join(workspace.C.nearby_opnames(op_type)) + ']'
469  )
470  if op_type not in _known_working_ops:
471  if not self.allow_not_known_ops:
472  raise AttributeError(
473  "Operator {} is not known to be safe".format(op_type))
474 
475  logging.warning("You are creating an op that the ModelHelper "
476  "does not recognize: {}.".format(op_type))
477  return self.net.__getattr__(op_type)
478 
479  def __dir__(self):
480  return sorted(set(chain(
481  dir(type(self)),
482  viewkeys(self.__dict__),
483  _known_working_ops
484  )))
485 
486 
487 def ExtractPredictorNet(
488  net_proto,
489  input_blobs,
490  output_blobs,
491  device=None,
492  renames=None,
493  disabled_inputs=None,
494 ):
495  '''
496  Takes a model net for training and returns a net which can be
497  used for prediction. For example, all gradient operators and
498  input operators are removed.
499  @param net_proto protobuf of the net you want to process (net.Proto())
500  @param input_blobs list/set of blob names that are the inputs of predictor
501  @param output_blobs list/set of blob names that are outputs of predictor
502  @param device optional device option that is assigned
503  @param renames dictionary of blob name to a new name (optional)
504  @param disabled_inputs optional set of blobs that are 'switched off'. This
505  will cause branches with those blobs as inputs to be removed
506  '''
507  predict_net = core.Net(net_proto.name + "_predict")
508  predict_proto = predict_net.Proto()
509 
510  orig_external_inputs = set(net_proto.external_input)
511  orig_external_outputs = set(net_proto.external_output)
512  input_blobs = {str(b) for b in input_blobs}
513  known_blobs = set(orig_external_inputs).union(input_blobs)
514  output_blobs = {str(b) for b in output_blobs}
515  external_inputs = set(input_blobs)
516  external_outputs = set(output_blobs)
517 
518  if renames is None:
519  renames = {}
520 
521  if disabled_inputs is not None:
522  known_blobs = known_blobs - set(disabled_inputs)
523 
524  ops = list(net_proto.op)
525 
526  # Find the range of ops that we should include
527  try:
528  first_op_with_input = min(
529  [
530  j for j in range(len(ops))
531  if input_blobs.intersection(ops[j].input) and ops[j].type !=
532  'StopGradient'
533  ]
534  )
535  except ValueError:
536  raise Exception("No ops with input={}".format(input_blobs))
537  try:
538  last_op_with_output = max(
539  [
540  j for j in range(len(ops))
541  if output_blobs.intersection(ops[j].output)
542  ]
543  )
544  except ValueError:
545  raise Exception("No ops with output={}".format(output_blobs))
546 
547  def validate_op(op):
548  # Check that the op does not have is_test = 0 set. This is a common
549  # pitfall with SpatialBN op, at lest.
550  for arg in op.arg:
551  if arg.name == "is_test" and arg.i == 0:
552  raise Exception(
553  "An operator had is_test=0, did you try to extract a " +
554  "predictor from a train model (instead of test model)?" +
555  " Op was: {}".format(str(op))
556  )
557 
558  def rename_list(proto_list):
559  # proto lists don't support assignments
560  new_list = proto_list[:]
561  for j, b in enumerate(new_list):
562  if b in renames:
563  new_list[j] = renames[b]
564 
565  del proto_list[:]
566  proto_list.extend(new_list)
567 
568  # Iterate through the ops and only include those whose inputs
569  # we can satisfy.
570  for op in ops[first_op_with_input:(last_op_with_output + 1)]:
571  if known_blobs.issuperset(op.input):
572 
573  # Special handling for recurrent nets
574  # TODO: when standard argument type for "nets" is introduced,
575  # this can be more general
576  if op.type == 'RecurrentNetwork':
577  for arg in op.arg:
578  if arg.name == 'backward_step_net':
579  arg.ClearField(str('n'))
580  elif arg.name == 'step_net':
581  for step_op in arg.n.op:
582  rename_list(step_op.input)
583  rename_list(step_op.output)
584  if device is not None:
585  step_op.device_option.device_type = device.device_type
586  step_op.device_option.cuda_gpu_id = device.cuda_gpu_id
587 
588  rename_list(arg.n.external_input)
589  rename_list(arg.n.external_output)
590 
591  # Add additional external inputs
592  external_inputs.update(
593  set(arg.n.external_input).intersection(
594  orig_external_inputs
595  )
596  )
597 
598  if device is not None:
599  op.device_option.device_type = device.device_type
600  op.device_option.cuda_gpu_id = device.cuda_gpu_id
601  validate_op(op)
602  predict_proto.op.extend([op])
603  known_blobs.update(op.output)
604  external_inputs.update(
605  set(op.input).intersection(orig_external_inputs)
606  )
607  external_outputs.update(
608  set(op.output).intersection(orig_external_outputs)
609  )
610 
611  else:
612  logging.debug(
613  "Op {} had unknown inputs: {}".format(
614  op.type, set(op.input).difference(known_blobs)
615  )
616  )
617 
618  # Predictor net's external inputs and outputs include only those
619  # that are part of this net.
620  predict_proto.external_input.extend(external_inputs)
621  predict_proto.external_output.extend(external_outputs)
622 
623  rename_list(predict_proto.external_input)
624  rename_list(predict_proto.external_output)
625 
626  renamed_input_blobs = []
627  for b in input_blobs:
628  if b in renames:
629  renamed_input_blobs.append(renames[b])
630  else:
631  renamed_input_blobs.append(b)
632 
633  for op in predict_proto.op:
634  rename_list(op.input)
635  rename_list(op.output)
636 
637  return predict_net, list(
638  set(predict_proto.external_input) - set(renamed_input_blobs)
639  )
def AddParameter(self, param, tags=None)
def create_param(self, param_name, shape, initializer, tags=None)
def GetOptimizationParamInfo(self, params=None)
def TensorProtosDBInput(self, unused_blob_in, blob_out, batch_size, db, db_type, kwargs)
def GetComputedParams(self, namescope=None)
def GetParams(self, namescope=None, top_scope=False)