Caffe2 - Python API
A deep learning, cross platform ML framework
layer_model_helper.py
1 # Copyright (c) 2016-present, Facebook, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 ##############################################################################
15 
16 ## @package layer_model_helper
17 # Module caffe2.python.layer_model_helper
18 from __future__ import absolute_import
19 from __future__ import division
20 from __future__ import print_function
21 from __future__ import unicode_literals
22 
23 from caffe2.python import core, model_helper, schema, scope
25  ParameterInfo,
26 )
28  parameter_sharing_context,
29 )
30 from caffe2.python.optimizer import get_param_device
31 from caffe2.python.regularizer import Regularizer
32 from caffe2.python.layers import layers
33 from caffe2.proto import caffe2_pb2
34 from future.utils import viewitems, viewvalues
35 
36 import logging
37 import numpy as np
38 import six
39 import copy
40 logger = logging.getLogger(__name__)
41 
42 
44  """
45  Model helper for building models on top of layers abstractions.
46 
47  Each layer is the abstraction that is higher level than Operator. Layer
48  is responsible for ownership of it's own parameters and can easily be
49  instantiated in multiple nets possible with different sets of ops.
50  As an example: one can easily instantiate predict and train nets from
51  the same set of layers, where predict net will have subset of the
52  operators from train net.
53  """
54 
55  def __init__(self, name, input_feature_schema, trainer_extra_schema,
56  keep_blobs=False):
57  ''' TODO(amalevich): more documnetation on input args
58  '''
59 
60  super(LayerModelHelper, self).__init__(name=name)
61  self._layer_names = set()
62  self._layers = []
63  self._param_to_shape = {}
64 
65  # seed default
66  self._seed = None
67  self._sequence_seed = True
68 
69  # optimizer bookkeeping
70  self.param_to_optim = {}
71  self.param_to_reg = {}
72 
73  self._default_optimizer = None
74  self._loss = None
75  self._output_schema = None
76 
77  # breakdown map; breakdown features are categorical (like dense) but not
78  # necessarily used to represent data for training
79  self._breakdown_map = None
80 
81  # Connect Schema to self.net. That particular instance of schmea will be
82  # use for generation of the Layers accross the network and would be used
83  # for connection with Readers.
84  self._input_feature_schema = schema.NewRecord(
85  self.net,
86  input_feature_schema
87  ) if not keep_blobs else input_feature_schema.clone()
88  self._trainer_extra_schema = schema.NewRecord(
89  self.net,
90  trainer_extra_schema
91  ) if not keep_blobs else trainer_extra_schema.clone()
93 
95  self.param_init_net = self.create_init_net('param_init_net')
96  self._initialize_params = True
97 
98  def clear_output_schema(self):
99  self._output_schema = None
100 
101  def set_initialize_params(self, initialize_params):
102  self._initialize_params = initialize_params
103 
104  def add_metric_field(self, name, value):
105  assert name not in self._metrics_schema.fields, (
106  "Try to add metric field twice: {}".format(name))
108  (name, value)
109  )
110 
111  @staticmethod
112  def _get_global_constant_initializer_op(
113  blob_name, array=None, dtype=None, initializer=None
114  ):
115  # to add a global constant to model, one first need to get the
116  # initializer
117  if array is not None:
118  assert initializer is None,\
119  "Only one from array and initializer should be specified"
120  if dtype is None:
121  array = np.array(array)
122  else:
123  array = np.array(array, dtype=dtype)
124 
125  # TODO: make GivenTensor generic
126  op_name = None
127  if array.dtype == np.int32:
128  op_name = 'GivenTensorIntFill'
129  elif array.dtype == np.int64:
130  op_name = 'GivenTensorInt64Fill'
131  elif array.dtype == np.str:
132  op_name = 'GivenTensorStringFill'
133  elif array.dtype == np.bool:
134  op_name = 'GivenTensorBoolFill'
135  else:
136  op_name = 'GivenTensorFill'
137 
138  def initializer(blob_name):
139  return core.CreateOperator(
140  op_name, [],
141  blob_name,
142  shape=array.shape,
143  values=array.flatten().tolist()
144  )
145  else:
146  assert initializer is not None
147  initializer_op = initializer(blob_name)
148  return initializer_op
149 
150  def add_global_constant(
151  self, name, array=None, dtype=None, initializer=None
152  ):
153  assert isinstance(name, six.string_types), (
154  'name should be a string as we are using it as map key')
155  # This is global namescope for constants. They will be created in all
156  # init_nets and there should be very few of them.
157  assert name not in self.global_constants, \
158  "%s already added in global_constants" % name
159  blob_name = self.net.NextBlob(name)
160  self.global_constants[name] = blob_name
161  initializer_op = LayerModelHelper._get_global_constant_initializer_op(
162  blob_name, array, dtype, initializer
163  )
164  assert blob_name not in self.global_constant_initializers, \
165  "there is already a initializer op associated with blob %s" % \
166  blob_name
167  self.global_constant_initializers[blob_name] = initializer_op
168  return blob_name
169 
170  def maybe_add_global_constant(self, name, *args, **kwargs):
171  # To ad hoc add new global constants without duplication
172  # if the name was already registered in global_constants, it will not be
173  # added even if the intended value is different from its original value
174  if name in self.global_constants:
175  blob_name = self.global_constants[name]
176  initializer_op = \
177  LayerModelHelper._get_global_constant_initializer_op(
178  blob_name, *args, **kwargs
179  )
180  # check if the original initializer is the same as the one intended
181  # now
182  assert initializer_op == \
183  self.global_constant_initializers[blob_name], \
184  "conflict initializers for global constant %s, " \
185  "previous %s, now %s" % (
186  blob_name, str(initializer_op),
187  str(self.global_constant_initializers[blob_name]))
188  return blob_name
189  return self.add_global_constant(name, *args, **kwargs)
190 
191  def _init_global_constants(self):
192  self.global_constants = {}
194  self.add_global_constant('ONE', 1.0)
195  self.add_global_constant('ZERO', 0.0)
196  self.add_global_constant('ZERO_RANGE', [0, 0], dtype='int32')
197 
198  def _add_global_constants(self, init_net):
199  for initializer_op in viewvalues(self.global_constant_initializers):
200  init_net._net.op.extend([initializer_op])
201 
202  def create_init_net(self, name):
203  init_net = core.Net(name)
204  self._add_global_constants(init_net)
205  return init_net
206 
207  def _validate_param_shape(self, param_name, shape):
208  if param_name not in self._param_to_shape:
209  return
210 
211  ref_shape = self._param_to_shape[param_name]
212 
213  if shape != ref_shape:
214  raise ValueError(
215  "Got inconsistent shapes between shared parameters "
216  "when trying to map a blob in scope {0} to {1}. ref_shape : "
217  " {2}, shape : {3}".format(
218  scope.CurrentNameScope(), param_name, ref_shape, shape)
219  )
220 
221  def create_param(self, param_name, shape, initializer, optimizer=None,
222  ps_param=None, regularizer=None):
223  if isinstance(param_name, core.BlobReference):
224  param_name = str(param_name)
225  elif isinstance(param_name, six.string_types):
226  # Parameter name will be equal to current Namescope that got
227  # resolved with the respect of parameter sharing of the scopes.
228  param_name = parameter_sharing_context.get_parameter_name(
229  param_name)
230  else:
231  raise "Unsupported type for param_name"
232 
233  param_blob = core.BlobReference(param_name)
234 
235  if len(initializer) == 1:
236  init_op_args = {}
237  else:
238  assert len(initializer) == 2
239  init_op_args = copy.deepcopy(initializer[1])
240  if shape is not None:
241  assert 'shape' not in init_op_args
242  init_op_args.update({'shape': shape})
243 
244  initializer_op = None
245  if self._initialize_params:
246  initializer_op = core.CreateOperator(
247  initializer[0],
248  [],
249  param_blob,
250  **init_op_args
251  )
252 
253  param = layers.LayerParameter(
254  parameter=param_blob,
255  initializer=initializer_op,
256  optimizer=optimizer,
257  ps_param=ps_param,
258  regularizer=regularizer
259  )
260 
261  self._validate_param_shape(param_name, shape)
262 
263  self._param_to_shape[param_name] = shape
264 
265  return param
266 
267  def next_layer_name(self, prefix):
268  base_name = core.ScopedName(prefix)
269  name = base_name
270  index = 0
271  while name in self._layer_names:
272  name = base_name + '_auto_' + str(index)
273  index += 1
274 
275  self._layer_names.add(name)
276  return name
277 
278  def add_layer(self, layer):
279  self._layers.append(layer)
280  for param in layer.get_parameters():
281  assert isinstance(param.parameter, core.BlobReference)
282 
283  self.param_to_optim[str(param.parameter)] = \
284  param.optimizer or self.default_optimizer
285 
286  self.params.append(param.parameter)
287  if isinstance(param, layers.LayerParameter):
288  self.param_to_reg[param.parameter] = param.regularizer
289  elif isinstance(param, ParameterInfo):
290  # TODO:
291  # Currently, LSTM and RNNcells, which use ModelHelper instead of
292  # LayerModelHelper as super class, are called in pooling_methods
293  # In ModelHelper, regularization is not supported in create_param
294  # We will unify the way of create_param of ModelHelper and
295  # LayerModelHelper in the future.
296  logger.info('regularization is unsupported for ParameterInfo object')
297  else:
298  raise ValueError(
299  'unknown object type besides ParameterInfo and LayerParameter: {}'
300  .format(param)
301  )
302 
303  # The primary value of adding everything to self.net - generation of the
304  # operators right away, i.e. if error happens it'll be detected
305  # immediately. Other than this - create_x_net should be called.
306  layer.add_operators(self.net, self.param_init_net)
307  return layer.output_schema
308 
309  def get_parameter_blobs(self):
310  param_blobs = []
311  for layer in self._layers:
312  for param in layer.get_parameters():
313  param_blobs.append(param.parameter)
314 
315  return param_blobs
316 
317  @property
318  def seed(self):
319  return self._seed
320 
321  def store_seed(self, seed, sequence_seed=True):
322  # Store seed config that will be applied to each op in the net.
323  self._seed = seed
324  # If sequence_seed is True, the i-th op has rand_seed=`seed + i`
325  self._sequence_seed = sequence_seed
326 
327  def apply_seed(self, net):
328  if self._seed:
329  net.set_rand_seed(self._seed, self._sequence_seed)
330 
331  @property
332  def default_optimizer(self):
333  return self._default_optimizer
334 
335  @default_optimizer.setter
336  def default_optimizer(self, optimizer):
337  self._default_optimizer = optimizer
338 
339  @property
340  def input_feature_schema(self):
341  return self._input_feature_schema
342 
343  @property
344  def trainer_extra_schema(self):
345  return self._trainer_extra_schema
346 
347  @property
348  def metrics_schema(self):
349  """
350  Returns the schema that represents model output that should be used for
351  metric reporting.
352 
353  During the training/evaluation this schema will be appended to the
354  schema that represents model output.
355  """
356  return self._metrics_schema
357 
358  @property
359  def output_schema(self):
360  assert self._output_schema is not None
361  return self._output_schema
362 
363  @output_schema.setter
364  def output_schema(self, schema):
365  assert self._output_schema is None
366  self._output_schema = schema
367 
368  @property
369  def loss(self):
370  assert self._loss is not None
371  return self._loss
372 
373  @loss.setter
374  def loss(self, loss):
375  assert self._loss is None
376  self._loss = loss
377 
378  def has_loss(self):
379  return self._loss is not None
380 
381  def add_loss(self, loss, name='unnamed'):
382  assert loss is not None, "Added loss should not be None"
383  assert isinstance(loss, schema.Scalar) or isinstance(
384  loss, schema.Struct
385  ), "Added loss should be a scalar or a struct"
386  if self._loss is None:
387  self._loss = schema.Struct((name, loss))
388  else:
389  prefix_base = name + '_auto_'
390  index = 0
391  prefix = name
392  while prefix in self._loss:
393  prefix = prefix_base + str(index)
394  index += 1
395  loss_struct = schema.Struct((prefix, loss))
396  self._loss = self._loss + loss_struct
397 
398  def add_trainer_extra_schema(self, trainer_extra_schema):
399  trainer_extra_record = schema.NewRecord(self.net, trainer_extra_schema)
400  self._trainer_extra_schema += trainer_extra_record
401 
402  def __getattr__(self, layer):
403  if layer.startswith('__'):
404  raise AttributeError(layer)
405 
406  # TODO(amalevich): Add add support for ifbpy inline documentation
407  if layers.layer_exists(layer):
408  def wrapper(*args, **kwargs):
409  new_layer = layers.create_layer(layer, self, *args, **kwargs)
410  if kwargs.get("output_to_metrics", False):
411  new_layer.export_output_for_metrics()
412  if kwargs.get("params_to_metrics", False):
413  new_layer.export_params_for_metrics()
414  return self.add_layer(new_layer)
415  return wrapper
416  elif core.IsOperator(layer):
417  def wrapper(*args, **kwargs):
418  def apply_operator(net, in_record, out_record, **kwargs):
419  # TODO(amalevich): Switch to net.operator as soon as it gets
420  # landed
421  net.__getattr__(layer)(in_record.field_blobs(),
422  out_record.field_blobs(),
423  **kwargs)
424 
425  if 'name' not in kwargs:
426  kwargs['name'] = layer
427 
428  new_layer = layers.create_layer(
429  'Functional',
430  self, *args, function=apply_operator,
431  **kwargs
432  )
433 
434  if kwargs.get("output_to_metrics", False):
435  new_layer.export_output_for_metrics()
436  if kwargs.get("params_to_metrics", False):
437  new_layer.export_params_for_metrics()
438 
439  return self.add_layer(new_layer)
440  return wrapper
441  else:
442  raise ValueError(
443  "Trying to create non-registered layer: {}".format(layer))
444 
445  @property
446  def layers(self):
447  return self._layers
448 
449  def apply_regularizers_on_loss(
450  self,
451  train_net,
452  train_init_net,
453  blob_to_device=None,
454  ):
455  for param, regularizer in viewitems(self.param_to_reg):
456  if regularizer is None or regularizer.apply_after_optimizer:
457  continue
458  assert isinstance(regularizer, Regularizer)
459  added_loss_blob = regularizer(train_net, train_init_net, param)
460  self.add_loss(
461  schema.Scalar(blob=added_loss_blob),
462  str(added_loss_blob)
463  )
464 
465  def apply_regularizers_after_optimizer(
466  self,
467  train_net,
468  train_init_net,
469  grad_map,
470  blob_to_device=None,
471  ):
472  for param, regularizer in viewitems(self.param_to_reg):
473  if regularizer is None or not regularizer.apply_after_optimizer:
474  continue
475  assert isinstance(regularizer, Regularizer)
476  regularizer(
477  train_net, train_init_net, param, grad_map.get(str(param)))
478 
479  def apply_optimizers(
480  self,
481  train_net,
482  train_init_net,
483  grad_map,
484  blob_to_device=None,
485  ):
486  CPU = core.DeviceOption(caffe2_pb2.CPU)
487  # if given, blob_to_device is a map from blob to device_option
488  blob_to_device = blob_to_device or {}
489  for param, optimizer in viewitems(self.param_to_optim):
490  assert optimizer is not None, \
491  "default optimizer must have been set in add_layer"
492  # note that not all params has gradient and thus we sent None if
493  # gradient does not exists
494  device = get_param_device(
495  param,
496  grad_map.get(str(param)),
497  param_to_device=blob_to_device,
498  default_device=CPU,
499  )
500  with core.DeviceScope(device):
501  optimizer(
502  train_net, train_init_net, param, grad_map.get(str(param)))
503 
504  def _GetOne(self):
505  return self.global_constants['ONE']
506 
507  # An optimizer which allows us to do NO optimization
508  def NoOptim(self, *args, **kwargs):
509  pass
510 
511  @property
512  def breakdown_map(self):
513  return self._breakdown_map
514 
515  @breakdown_map.setter
516  def breakdown_map(self, breakdown_map):
517  # TODO(xlwang): provide more rich feature information in breakdown_map;
518  # and change the assertion accordingly
519  assert isinstance(breakdown_map, dict)
520  assert all(isinstance(k, six.string_types) for k in breakdown_map)
521  assert sorted(list(breakdown_map.values())) == range(len(breakdown_map))
522  self._breakdown_map = breakdown_map
Module caffe2.python.optimizer.
def add_global_constant(self, name, array=None, dtype=None, initializer=None)
Module caffe2.python.layers.layers.
def __init__(self, name, input_feature_schema, trainer_extra_schema, keep_blobs=False)