Caffe2 - Python API
A deep learning, cross platform ML framework
layer_model_helper.py
1 # @package layer_model_helper
2 # Module caffe2.python.layer_model_helper
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 from caffe2.python import core, model_helper, schema, scope, utils, muji
10  ParameterInfo,
11 )
13  parameter_sharing_context,
14 )
15 from caffe2.python.modeling.net_modifier import NetModifier
16 
17 from caffe2.python.optimizer import get_param_device
18 from caffe2.python.regularizer import Regularizer, RegularizationBy
19 from caffe2.python.layers import layers
20 from caffe2.proto import caffe2_pb2
21 from future.utils import viewitems, viewvalues
22 
23 import logging
24 import numpy as np
25 import six
26 import copy
27 logger = logging.getLogger(__name__)
28 
29 
31  """
32  Model helper for building models on top of layers abstractions.
33 
34  Each layer is the abstraction that is higher level than Operator. Layer
35  is responsible for ownership of it's own parameters and can easily be
36  instantiated in multiple nets possible with different sets of ops.
37  As an example: one can easily instantiate predict and train nets from
38  the same set of layers, where predict net will have subset of the
39  operators from train net.
40  """
41 
42  def __init__(self, name, input_feature_schema, trainer_extra_schema,
43  keep_blobs=False):
44  ''' TODO(amalevich): more documnetation on input args
45  '''
46 
47  super(LayerModelHelper, self).__init__(name=name)
48  self._layer_names = set()
49  self._layers = []
50  self._param_to_shape = {}
51 
52  # seed default
53  self._seed = None
54  self._sequence_seed = True
55 
56  # optimizer bookkeeping
57  self.param_to_optim = {}
58  self.param_to_reg = {}
59 
60  self._default_optimizer = None
61  self._loss = None
62  self._prediction = []
63  self._output_schema = None
64 
65  self._post_grad_net_modifiers = []
66  self._final_net_modifiers = []
67 
68  # breakdown map; breakdown features are categorical (like dense) but not
69  # necessarily used to represent data for training
70  self._breakdown_map = None
71 
72  # Connect Schema to self.net. That particular instance of schmea will be
73  # use for generation of the Layers accross the network and would be used
74  # for connection with Readers.
75  self._input_feature_schema = schema.NewRecord(
76  self.net,
77  input_feature_schema
78  ) if not keep_blobs else input_feature_schema.clone()
79  self._trainer_extra_schema = schema.NewRecord(
80  self.net,
81  trainer_extra_schema
82  ) if not keep_blobs else trainer_extra_schema.clone()
84 
85  self._preproc_output_schema = None
86 
88  self.param_init_net = self.create_init_net('param_init_net')
89  self._initialize_params = True
90 
91  # additional (hard-coded) diagnose_options to report based on the model
92  # TODO(xlwang): it's hack!
94  self.ad_hoc_plot_blobs = []
95 
96  def clear_output_schema(self):
97  self._output_schema = None
98 
99  def set_initialize_params(self, initialize_params):
100  self._initialize_params = initialize_params
101 
102  def add_metric_field(self, name, value):
103  assert name not in self._metrics_schema.fields, (
104  "Try to add metric field twice: {}".format(name))
106  (name, value)
107  )
108 
109  def add_ad_hoc_plot_blob(self, blob, dtype=None):
110  assert isinstance(
111  blob, (six.string_types, core.BlobReference)
112  ), "expect type str or BlobReference, but got {}".format(type(blob))
113  dtype = dtype or (np.float, (1, ))
114  self.add_metric_field(str(blob), schema.Scalar(dtype, blob))
115  self.ad_hoc_plot_blobs.append(blob)
116 
117  @staticmethod
118  def _get_global_constant_initializer_op(
119  blob_name, array=None, dtype=None, initializer=None
120  ):
121  # to add a global constant to model, one first need to get the
122  # initializer
123  if array is not None:
124  assert initializer is None,\
125  "Only one from array and initializer should be specified"
126  if dtype is None:
127  array = np.array(array)
128  else:
129  array = np.array(array, dtype=dtype)
130 
131  # TODO: make GivenTensor generic
132  op_name = None
133  if array.dtype == np.int32:
134  op_name = 'GivenTensorIntFill'
135  elif array.dtype == np.int64:
136  op_name = 'GivenTensorInt64Fill'
137  elif array.dtype == np.str:
138  op_name = 'GivenTensorStringFill'
139  elif array.dtype == np.bool:
140  op_name = 'GivenTensorBoolFill'
141  else:
142  op_name = 'GivenTensorFill'
143 
144  def initializer(blob_name):
145  return core.CreateOperator(
146  op_name, [],
147  blob_name,
148  shape=array.shape,
149  values=array.flatten().tolist()
150  )
151  else:
152  assert initializer is not None
153  initializer_op = initializer(blob_name)
154  return initializer_op
155 
156  def add_global_constant(
157  self, name, array=None, dtype=None, initializer=None
158  ):
159  assert isinstance(name, six.string_types), (
160  'name should be a string as we are using it as map key')
161  # This is global namescope for constants. They will be created in all
162  # init_nets and there should be very few of them.
163  assert name not in self.global_constants, \
164  "%s already added in global_constants" % name
165  blob_name = self.net.NextBlob(name)
166  self.global_constants[name] = blob_name
167  initializer_op = LayerModelHelper._get_global_constant_initializer_op(
168  blob_name, array, dtype, initializer
169  )
170  assert blob_name not in self.global_constant_initializers, \
171  "there is already a initializer op associated with blob %s" % \
172  blob_name
173  self.global_constant_initializers[blob_name] = initializer_op
174  return blob_name
175 
176  def maybe_add_global_constant(self, name, *args, **kwargs):
177  # To ad hoc add new global constants without duplication
178  # if the name was already registered in global_constants, it will not be
179  # added even if the intended value is different from its original value
180 
181  if name in self.global_constants:
182  blob_name = self.global_constants[name]
183  initializer_op = \
184  LayerModelHelper._get_global_constant_initializer_op(
185  blob_name, *args, **kwargs
186  )
187  # check if the original initializer is the same as the one intended
188  # now
189  assert utils.OpAlmostEqual(
190  initializer_op,
191  self.global_constant_initializers[blob_name],
192  'debug_info'
193  ), \
194  "conflict initializers for global constant %s, " \
195  "previous %s, now %s" % (
196  blob_name, str(initializer_op),
197  str(self.global_constant_initializers[blob_name]))
198  return blob_name
199  return self.add_global_constant(name, *args, **kwargs)
200 
201  def _init_global_constants(self):
202  self.global_constants = {}
204  self.add_global_constant('ONE', 1.0)
205  self.add_global_constant('ZERO', 0.0)
206  self.add_global_constant('ZERO_RANGE', [0, 0], dtype='int32')
207 
208  def _add_global_constants(self, init_net):
209  for initializer_op in viewvalues(self.global_constant_initializers):
210  init_net._net.op.extend([initializer_op])
211 
212  def create_init_net(self, name):
213  init_net = core.Net(name)
214  self._add_global_constants(init_net)
215  return init_net
216 
217  def _validate_param_shape(self, param_name, shape):
218  if param_name not in self._param_to_shape:
219  return
220 
221  ref_shape = self._param_to_shape[param_name]
222 
223  if shape != ref_shape:
224  raise ValueError(
225  "Got inconsistent shapes between shared parameters "
226  "when trying to map a blob in scope {0} to {1}. ref_shape : "
227  " {2}, shape : {3}".format(
228  scope.CurrentNameScope(), param_name, ref_shape, shape)
229  )
230 
231  def create_param(self, param_name, shape, initializer, optimizer=None,
232  ps_param=None, regularizer=None):
233  if isinstance(param_name, core.BlobReference):
234  param_name = str(param_name)
235  elif isinstance(param_name, six.string_types):
236  # Parameter name will be equal to current Namescope that got
237  # resolved with the respect of parameter sharing of the scopes.
238  param_name = parameter_sharing_context.get_parameter_name(
239  param_name)
240  else:
241  raise ValueError("Unsupported type for param_name")
242 
243  param_blob = core.BlobReference(param_name)
244 
245  if len(initializer) == 1:
246  init_op_args = {}
247  else:
248  assert len(initializer) == 2
249  init_op_args = copy.deepcopy(initializer[1])
250  if shape is not None:
251  assert 'shape' not in init_op_args
252  init_op_args.update({'shape': shape})
253 
254  initializer_op = None
255  if self._initialize_params:
256  initializer_op = core.CreateOperator(
257  initializer[0],
258  [],
259  param_blob,
260  **init_op_args
261  )
262 
263  param = layers.LayerParameter(
264  parameter=param_blob,
265  initializer=initializer_op,
266  optimizer=optimizer,
267  ps_param=ps_param,
268  regularizer=regularizer
269  )
270 
271  self._validate_param_shape(param_name, shape)
272 
273  self._param_to_shape[param_name] = shape
274 
275  return param
276 
277  def next_layer_name(self, prefix):
278  base_name = core.ScopedName(prefix)
279  name = base_name
280  index = 0
281  while name in self._layer_names:
282  name = base_name + '_auto_' + str(index)
283  index += 1
284 
285  self._layer_names.add(name)
286  return name
287 
288  def add_layer(self, layer):
289  self._layers.append(layer)
290  for param in layer.get_parameters():
291  assert isinstance(param.parameter, core.BlobReference)
292 
293  self.param_to_optim[str(param.parameter)] = \
294  param.optimizer or self.default_optimizer
295 
296  self.params.append(param.parameter)
297  if isinstance(param, layers.LayerParameter):
298  self.param_to_reg[param.parameter] = param.regularizer
299  elif isinstance(param, ParameterInfo):
300  # TODO:
301  # Currently, LSTM and RNNcells, which use ModelHelper instead of
302  # LayerModelHelper as super class, are called in pooling_methods
303  # In ModelHelper, regularization is not supported in create_param
304  # We will unify the way of create_param of ModelHelper and
305  # LayerModelHelper in the future.
306  logger.info('regularization is unsupported for ParameterInfo object')
307  else:
308  raise ValueError(
309  'unknown object type besides ParameterInfo and LayerParameter: {}'
310  .format(param)
311  )
312 
313  # The primary value of adding everything to self.net - generation of the
314  # operators right away, i.e. if error happens it'll be detected
315  # immediately. Other than this - create_x_net should be called.
316  layer.add_operators(self.net, self.param_init_net)
317  return layer.output_schema
318 
319  def get_parameter_blobs(self):
320  param_blobs = []
321  for layer in self._layers:
322  for param in layer.get_parameters():
323  param_blobs.append(param.parameter)
324 
325  return param_blobs
326 
327  def add_post_grad_net_modifiers(self, modifier):
328  assert modifier not in self._post_grad_net_modifiers,\
329  "{0} is already in {1}".format(modifier, self._post_grad_net_modifiers)
330  assert isinstance(modifier, NetModifier),\
331  "{} has to be a NetModifier instance".format(modifier)
332  self._post_grad_net_modifiers.append(modifier)
333 
334  def add_final_net_modifiers(self, modifier):
335  assert modifier not in self._final_net_modifiers,\
336  "{0} is already in {1}".format(modifier, self._final_net_modifiers)
337  assert isinstance(modifier, NetModifier),\
338  "{} has to be a NetModifier instance".format(modifier)
339  self._final_net_modifiers.append(modifier)
340 
341  @property
342  def seed(self):
343  return self._seed
344 
345  @property
346  def sequence_seed(self):
347  return self._sequence_seed
348 
349  def store_seed(self, seed, sequence_seed=True):
350  # Store seed config that will be applied to each op in the net.
351  self._seed = seed
352  # If sequence_seed is True, the i-th op has rand_seed=`seed + i`
353  self._sequence_seed = sequence_seed
354 
355  def apply_seed(self, net):
356  if self._seed:
357  net.set_rand_seed(self._seed, self._sequence_seed)
358 
359  @property
360  def default_optimizer(self):
361  return self._default_optimizer
362 
363  @default_optimizer.setter
364  def default_optimizer(self, optimizer):
365  self._default_optimizer = optimizer
366 
367  @property
368  def input_feature_schema(self):
369  return self._input_feature_schema
370 
371  @property
372  def trainer_extra_schema(self):
373  return self._trainer_extra_schema
374 
375  @property
376  def metrics_schema(self):
377  """
378  Returns the schema that represents model output that should be used for
379  metric reporting.
380 
381  During the training/evaluation this schema will be appended to the
382  schema that represents model output.
383  """
384  return self._metrics_schema
385 
386  @property
387  def output_schema(self):
388  assert self._output_schema is not None
389  return self._output_schema
390 
391  @output_schema.setter
392  def output_schema(self, schema):
393  assert self._output_schema is None
394  self._output_schema = schema
395 
396  @property
397  def preproc_output_schema(self):
398  assert self._preproc_output_schema is not None
399  return self._preproc_output_schema
400 
401  @preproc_output_schema.setter
402  def preproc_output_schema(self, schema):
403  assert self._preproc_output_schema is None
404  self._preproc_output_schema = schema
405 
406  @property
407  def prediction(self):
408  assert self._prediction, "model prediction is empty"
409  return self._prediction
410 
411  def add_prediction(self, prediction, weight=1.0):
412  assert prediction is not None, "Added prediction should not be None"
413  self._prediction.append((prediction, weight))
414 
415  @property
416  def loss(self):
417  assert self._loss is not None
418  return self._loss
419 
420  @loss.setter
421  def loss(self, loss):
422  assert self._loss is None
423  self._loss = loss
424 
425  def has_loss(self):
426  return self._loss is not None
427 
428  def add_loss(self, loss, name='unnamed'):
429  assert loss is not None, "Added loss should not be None"
430  assert isinstance(loss, schema.Scalar) or isinstance(
431  loss, schema.Struct
432  ), "Added loss should be a scalar or a struct"
433  if self._loss is None:
434  self._loss = schema.Struct((name, loss))
435  else:
436  # loss could've been set through model.loss directly which could be
437  # a scalar
438  if isinstance(self._loss, schema.Scalar):
439  self._loss = schema.Struct(('unnamed', self._loss))
440 
441  prefix_base = name + '_auto_'
442  index = 0
443  prefix = name
444  while prefix in self._loss:
445  prefix = prefix_base + str(index)
446  index += 1
447  loss_struct = schema.Struct((prefix, loss))
448  self._loss = self._loss + loss_struct
449 
450  def add_output_schema(self, name, value):
451  assert value is not None, \
452  'Added output schema {} should not be None'.format(name)
453  assert isinstance(value, schema.Scalar) or \
454  isinstance(value, schema.Struct), \
455  'Added output schema {} should be a scalar or a struct.\n\
456  Now it is {}.'.format(name, type(value))
457  if self._output_schema is None: # be the first field
458  self._output_schema = schema.Struct((name, value))
459  else: # merge with other fields
460  assert name not in self._output_schema.fields, \
461  'Output Schema Field {} already exists'.format(name)
462  self._output_schema = \
463  self._output_schema + schema.Struct((name, value))
464 
465  def add_trainer_extra_schema(self, trainer_extra_schema):
466  trainer_extra_record = schema.NewRecord(self.net, trainer_extra_schema)
467  self._trainer_extra_schema += trainer_extra_record
468 
469  def __getattr__(self, layer):
470  def is_functional_layer(layer):
471  if core.IsOperator(layer):
472  return True
473  elif layer.startswith('FunctionalLayer'):
474  return True
475  else:
476  return False
477 
478  def resolve_functional_layer(layer):
479  if core.IsOperator(layer):
480  return layer
481  elif layer.startswith('FunctionalLayer'):
482  return layer[len('FunctionalLayer'):]
483  else:
484  raise ValueError(
485  '%s cannot be resolved as functional layer' % layer
486  )
487 
488  if layer.startswith('__'):
489  raise AttributeError(layer)
490 
491  # TODO(amalevich): Add add support for ifbpy inline documentation
492  if layers.layer_exists(layer):
493  def wrapper(*args, **kwargs):
494  new_layer = layers.create_layer(layer, self, *args, **kwargs)
495  if kwargs.get("output_to_metrics", False):
496  new_layer.export_output_for_metrics()
497  if kwargs.get("params_to_metrics", False):
498  new_layer.export_params_for_metrics()
499  return self.add_layer(new_layer)
500  return wrapper
501  elif is_functional_layer(layer):
502  # TODO(xlwang): Desginated layer shadows the usage of an op as a
503  # single layer. To enforce using an op (e.g. Split) as functional
504  # layer, one can call 'model.FunctionalLayerSplit'
505  layer = resolve_functional_layer(layer)
506 
507  def wrapper(*args, **kwargs):
508  def apply_operator(net, in_record, out_record, **kwargs):
509  # TODO(amalevich): Switch to net.operator as soon as it gets
510  # landed
511  net.__getattr__(layer)(in_record.field_blobs(),
512  out_record.field_blobs(),
513  **kwargs)
514 
515  if 'name' not in kwargs:
516  kwargs['name'] = layer
517 
518  new_layer = layers.create_layer(
519  'Functional',
520  self, *args, function=apply_operator,
521  **kwargs
522  )
523 
524  if kwargs.get("output_to_metrics", False):
525  new_layer.export_output_for_metrics()
526  if kwargs.get("params_to_metrics", False):
527  new_layer.export_params_for_metrics()
528 
529  return self.add_layer(new_layer)
530  return wrapper
531  else:
532  # this needs to be an AttributeError to fit hasattr semantics
533  raise AttributeError(
534  "Trying to create non-registered layer: {}".format(layer))
535 
536  @property
537  def layers(self):
538  return self._layers
539 
540  def apply_regularizers_on_loss(
541  self,
542  train_net,
543  train_init_net,
544  blob_to_device=None,
545  ):
546  for param, regularizer in viewitems(self.param_to_reg):
547  if regularizer is None:
548  continue
549  assert isinstance(regularizer, Regularizer)
550  added_loss_blob = regularizer(train_net, train_init_net, param, grad=None,
551  by=RegularizationBy.ON_LOSS)
552  if added_loss_blob is not None:
553  self.add_loss(
554  schema.Scalar(blob=added_loss_blob),
555  str(added_loss_blob)
556  )
557 
558  def apply_regularizers_after_optimizer(
559  self,
560  train_net,
561  train_init_net,
562  grad_map,
563  blob_to_device=None,
564  ):
565  CPU = muji.OnCPU()
566  # if given, blob_to_device is a map from blob to device_option
567  blob_to_device = blob_to_device or {}
568  for param, regularizer in viewitems(self.param_to_reg):
569  if regularizer is None:
570  continue
571  assert isinstance(regularizer, Regularizer)
572  device = get_param_device(
573  param,
574  grad_map.get(str(param)),
575  param_to_device=blob_to_device,
576  default_device=CPU,
577  )
578  with core.DeviceScope(device):
579  regularizer(
580  train_net, train_init_net, param, grad=grad_map.get(str(param)),
581  by=RegularizationBy.AFTER_OPTIMIZER
582  )
583 
584  def apply_post_grad_net_modifiers(
585  self,
586  trainer_net,
587  trainer_init_net,
588  grad_map,
589  blob_to_device=None,
590  modify_output_record=False,
591  ):
592  param_grad_map = {param: grad_map[param]
593  for param in self.param_to_optim.keys() if param in grad_map}
594 
595  for modifier in self._post_grad_net_modifiers:
596  modifier(trainer_net, trainer_init_net, param_grad_map,
597  blob_to_device=blob_to_device,
598  modify_output_record=modify_output_record)
599 
600  def apply_final_net_modifiers(
601  self,
602  trainer_net,
603  trainer_init_net,
604  grad_map,
605  blob_to_device=None,
606  modify_output_record=False,
607  ):
608  for modifier in self._final_net_modifiers:
609  modifier(trainer_net, trainer_init_net, grad_map,
610  blob_to_device=blob_to_device,
611  modify_output_record=modify_output_record)
612 
613  def apply_optimizers(
614  self,
615  train_net,
616  train_init_net,
617  grad_map,
618  blob_to_device=None,
619  ):
620  CPU = muji.OnCPU()
621  # if given, blob_to_device is a map from blob to device_option
622  blob_to_device = blob_to_device or {}
623  for param, optimizer in viewitems(self.param_to_optim):
624  assert optimizer is not None, \
625  "default optimizer must have been set in add_layer"
626  # note that not all params has gradient and thus we sent None if
627  # gradient does not exists
628  device = get_param_device(
629  param,
630  grad_map.get(str(param)),
631  param_to_device=blob_to_device,
632  default_device=CPU,
633  )
634  if device is not None:
635  # extra info is not applicable for optimizers
636  del device.extra_info[:]
637 
638  with core.DeviceScope(device):
639  optimizer(
640  train_net, train_init_net, param, grad_map.get(str(param)))
641 
642  def _GetOne(self):
643  return self.global_constants['ONE']
644 
645  # An optimizer which allows us to do NO optimization
646  def NoOptim(self, *args, **kwargs):
647  pass
648 
649  @property
650  def breakdown_map(self):
651  return self._breakdown_map
652 
653  @breakdown_map.setter
654  def breakdown_map(self, breakdown_map):
655  # TODO(xlwang): provide more rich feature information in breakdown_map;
656  # and change the assertion accordingly
657  assert isinstance(breakdown_map, dict)
658  assert all(isinstance(k, six.string_types) for k in breakdown_map)
659  assert sorted(breakdown_map.values()) == list(range(len(breakdown_map)))
660  self._breakdown_map = breakdown_map
def add_global_constant(self, name, array=None, dtype=None, initializer=None)
Module caffe2.python.layers.layers.
def __init__(self, name, input_feature_schema, trainer_extra_schema, keep_blobs=False)