Caffe2 - Python API
A deep learning, cross platform ML framework
initializers.py
1 from __future__ import absolute_import
2 from __future__ import division
3 from __future__ import print_function
4 from __future__ import unicode_literals
5 
6 from caffe2.python.core import DataType, BlobReference, ScopedBlobReference
7 from caffe2.python.modeling.parameter_info import ParameterInfo
8 
9 import six
10 
11 
12 class Initializer(object):
13  '''
14  This class abstracts out parameter creation. One can come up with a new
15  Initializer in order to implement more complex parameter initializaion logic
16  '''
17 
18  def __init__(self, operator_name=None, **kwargs):
19  self.operator_name = operator_name
20  self.operator_kwargs = kwargs
21 
22  def update(self, operator_name, kwargs):
23  if self.operator_name is not None:
24  raise Exception("Operator name overwrites are not allowed")
25  self.operator_name = operator_name
26  self.operator_kwargs = kwargs
27 
28  def create_param(self, param_name, init_net, shape):
29  param = init_net.__getattr__(self.operator_name)(
30  [], param_name, shape=shape, **self.operator_kwargs)
31  return ParameterInfo(
32  param_id=None,
33  param=param,
34  shape=shape,
35  )
36 
37 
38 class ExternalInitializer(object):
39  '''
40  This class is used in cases when the parameter should not be initialized by
41  the initializer, but rather provided in the workspace when param_init_net is
42  executed.
43 
44  Current version is not doing any real sanity checks to the parameter.
45  '''
46 
47  def create_param(self, param_name, init_net, shape):
48  if isinstance(param_name, BlobReference):
49  param = BlobReference(str(param_name), init_net)
50  elif isinstance(param_name, six.string_types):
51  param = ScopedBlobReference(param_name, init_net)
52  else:
53  raise "Unsupported type for param_name"
54  # TODO(amalevich): Add operator that will check param in the workspace
55  return ParameterInfo(
56  param_id=None,
57  param=param,
58  shape=shape,
59  )
60 
61 
63  '''
64  Used in cases when the parameter should be used at half (16-bit) precision
65  for compute purposes (i.e. on the forward and backward pass) but
66  needs to be stored and optimized at single (32-bit) precision so tiny
67  gradients with small learning rates don't underflow FP16 precision.
68  A 32-bit copy of the 16-bit blob is stored in the ParameterInfo.
69  This is helpful for mixed-precision training, see
70  https://arxiv.org/abs/1710.03740 for details.
71  '''
72  def update(self, operator_name, kwargs):
73  if self.operator_name is not None:
74  raise Exception("Operator name overwrites are not allowed")
75  self.operator_name = operator_name
76  self.operator_kwargs = kwargs
77 
78  def create_param(self, param_name, init_net, shape):
79  # create master fp32 copy
80  param_fp32 = init_net.__getattr__(self.operator_name)(
81  [], param_name + "_fp32", shape=shape,
82  **self.operator_kwargs)
83  # cast to fp16 copy
84  param = init_net.FloatToHalf(
85  param_fp32, param_name)
86 
87  return ParameterInfo(
88  param_id=None,
89  param=param,
90  shape=shape,
91  blob_copy={DataType.FLOAT: param_fp32}
92  )
93 
94 
96  '''
97  Like PseudoFP16Initializer above, except the primary blob is taken to
98  be the 32-bit precision parameter, and the 16-bit version of the blob
99  is stored in blob_copy instead.
100  '''
101  def update(self, operator_name, kwargs):
102  if self.operator_name is not None:
103  raise Exception("Operator name overwrites are not allowed")
104  self.operator_name = operator_name
105  self.operator_kwargs = kwargs
106 
107  def create_param(self, param_name, init_net, shape):
108  # create master fp32 copy
109  param_fp32 = init_net.__getattr__(self.operator_name)(
110  [], param_name, shape=shape,
111  **self.operator_kwargs)
112  # cast to fp16 copy
113  param_fp16 = init_net.FloatToHalf(
114  param_fp32, param_name + "_fp16")
115 
116  return ParameterInfo(
117  param_id=None,
118  param=param_fp32,
119  shape=shape,
120  blob_copy={DataType.FLOAT16: param_fp16}
121  )
122 
123 def update_initializer(initializer_class,
124  operator_name_and_kwargs,
125  default_operator_name_and_kwargs):
126  '''
127  A helper function to convert from operator_name_and_kwargs to new
128  object of type initializer_class. This function serves two purposes:
129 
130  1. Support for custom initialization operators being passed in
131  2. Allow user to specify a custom Initializer without overwriting
132  default operators used for initialization
133 
134  If initializer_class is None, creates a default initializer using
135  the Initializer class and operator_name_and_kwargs provided
136 
137  If operator_name_and_kwargs is None, uses default_operator_name_and_kwargs
138 
139  returns an instantiated Initializer object
140  '''
141  def get_initializer_args():
142  return (
143  operator_name_and_kwargs or
144  default_operator_name_and_kwargs
145  )
146 
147  if initializer_class is not None:
148  init = initializer_class(get_initializer_args()[0],
149  **get_initializer_args()[1])
150  else:
151  init = Initializer(
152  get_initializer_args()[0],
153  **get_initializer_args()[1]
154  )
155  return init