Caffe2 - Python API
A deep learning, cross platform ML framework
normalization.py
1 ## @package normalization
2 # Module caffe2.python.helpers.normalization
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 from caffe2.python import scope
9 from caffe2.python.modeling.parameter_info import ParameterTags
10 from caffe2.proto import caffe2_pb2
11 from caffe2.python.modeling import initializers
12 
13 
14 def lrn(model, blob_in, blob_out, order="NCHW", use_cudnn=False, **kwargs):
15  """LRN"""
16  dev = kwargs['device_option'] if 'device_option' in kwargs \
17  else scope.CurrentDeviceScope()
18  is_cpu = dev is None or dev.device_type == caffe2_pb2.CPU
19  if use_cudnn and (not is_cpu):
20  kwargs['engine'] = 'CUDNN'
21  blobs_out = blob_out
22  else:
23  blobs_out = [blob_out, "_" + blob_out + "_scale"]
24  lrn = model.net.LRN(
25  blob_in,
26  blobs_out,
27  order=order,
28  **kwargs
29  )
30 
31  if use_cudnn and (not is_cpu):
32  return lrn
33  else:
34  return lrn[0]
35 
36 
37 def softmax(model, blob_in, blob_out=None, use_cudnn=False, **kwargs):
38  """Softmax."""
39  if use_cudnn:
40  kwargs['engine'] = 'CUDNN'
41  if blob_out is not None:
42  return model.net.Softmax(blob_in, blob_out, **kwargs)
43  else:
44  return model.net.Softmax(blob_in, **kwargs)
45 
46 
47 def instance_norm(model, blob_in, blob_out, dim_in, order="NCHW", **kwargs):
48  blob_out = blob_out or model.net.NextName()
49  # Input: input, scale, bias
50  # Output: output, saved_mean, saved_inv_std
51  # scale: initialize with ones
52  # bias: initialize with zeros
53 
54  def init_blob(value, suffix):
55  return model.param_init_net.ConstantFill(
56  [], blob_out + "_" + suffix, shape=[dim_in], value=value)
57  scale, bias = init_blob(1.0, "s"), init_blob(0.0, "b")
58 
59  model.AddParameter(scale, ParameterTags.WEIGHT)
60  model.AddParameter(bias, ParameterTags.BIAS)
61  blob_outs = [blob_out, blob_out + "_sm", blob_out + "_siv"]
62  if 'is_test' in kwargs and kwargs['is_test']:
63  blob_outputs = model.net.InstanceNorm(
64  [blob_in, scale, bias], [blob_out],
65  order=order, **kwargs)
66  return blob_outputs
67  else:
68  blob_outputs = model.net.InstanceNorm(
69  [blob_in, scale, bias], blob_outs,
70  order=order, **kwargs)
71  # Return the output
72  return blob_outputs[0]
73 
74 
75 def spatial_bn(model, blob_in, blob_out, dim_in,
76  init_scale=1., init_bias=0.,
77  ScaleInitializer=None, BiasInitializer=None,
78  RunningMeanInitializer=None, RunningVarianceInitializer=None,
79  order="NCHW", **kwargs):
80  blob_out = blob_out or model.net.NextName()
81  # Input: input, scale, bias, est_mean, est_inv_var
82  # Output: output, running_mean, running_inv_var, saved_mean,
83  # saved_inv_var
84  # scale: initialize with init_scale (default 1.)
85  # bias: initialize with init_bias (default 0.)
86  # est mean: zero
87  # est var: ones
88 
89  if model.init_params:
90  scale_init = ("ConstantFill", {'value': init_scale})
91  bias_init = ("ConstantFill", {'value': init_bias})
92  rm_init = ("ConstantFill", {'value': 0.0})
93  riv_init = ("ConstantFill", {'value': 1.0})
94 
95  ScaleInitializer = initializers.update_initializer(
96  ScaleInitializer, scale_init, ("ConstantFill", {})
97  )
98  BiasInitializer = initializers.update_initializer(
99  BiasInitializer, bias_init, ("ConstantFill", {})
100  )
101  RunningMeanInitializer = initializers.update_initializer(
102  RunningMeanInitializer, rm_init, ("ConstantFill", {})
103  )
104  RunningVarianceInitializer = initializers.update_initializer(
105  RunningVarianceInitializer, riv_init, ("ConstantFill", {})
106  )
107  else:
108  ScaleInitializer = initializers.ExternalInitializer()
109  BiasInitializer = initializers.ExternalInitializer()
110  RunningMeanInitializer = initializers.ExternalInitializer()
111  RunningVarianceInitializer = initializers.ExternalInitializer()
112 
113  scale = model.create_param(
114  param_name=blob_out + '_s',
115  shape=[dim_in],
116  initializer=ScaleInitializer,
117  tags=ParameterTags.WEIGHT
118  )
119 
120  bias = model.create_param(
121  param_name=blob_out + '_b',
122  shape=[dim_in],
123  initializer=BiasInitializer,
124  tags=ParameterTags.BIAS
125  )
126 
127  running_mean = model.create_param(
128  param_name=blob_out + '_rm',
129  shape=[dim_in],
130  initializer=RunningMeanInitializer,
131  tags=ParameterTags.COMPUTED_PARAM
132  )
133 
134  running_inv_var = model.create_param(
135  param_name=blob_out + '_riv',
136  shape=[dim_in],
137  initializer=RunningVarianceInitializer,
138  tags=ParameterTags.COMPUTED_PARAM
139  )
140 
141  blob_outs = [blob_out, running_mean, running_inv_var,
142  blob_out + "_sm", blob_out + "_siv"]
143  if 'is_test' in kwargs and kwargs['is_test']:
144  blob_outputs = model.net.SpatialBN(
145  [blob_in, scale, bias, blob_outs[1], blob_outs[2]], [blob_out],
146  order=order, **kwargs)
147  return blob_outputs
148  else:
149  blob_outputs = model.net.SpatialBN(
150  [blob_in, scale, bias, blob_outs[1], blob_outs[2]], blob_outs,
151  order=order, **kwargs)
152  # Return the output
153  return blob_outputs[0]
154 
155 
156 def spatial_gn(model, blob_in, blob_out, dim_in,
157  init_scale=1., init_bias=0.,
158  ScaleInitializer=None, BiasInitializer=None,
159  RunningMeanInitializer=None, RunningVarianceInitializer=None,
160  order="NCHW", **kwargs):
161  '''
162  Group normalizes the input, cf. https://arxiv.org/abs/1803.08494.
163  '''
164 
165  blob_out = blob_out or model.net.NextName()
166  # Input: input, scale, bias
167  # Output: output, group_mean, group_inv_std
168  # scale: initialize with init_scale (default 1.)
169  # [recommendation: set init_scale = 0. in the last layer for each res block]
170  # bias: initialize with init_bias (default 0.)
171 
172  if model.init_params:
173  scale_init = ("ConstantFill", {'value': init_scale})
174  bias_init = ("ConstantFill", {'value': init_bias})
175 
176  ScaleInitializer = initializers.update_initializer(
177  ScaleInitializer, scale_init, ("ConstantFill", {})
178  )
179  BiasInitializer = initializers.update_initializer(
180  BiasInitializer, bias_init, ("ConstantFill", {})
181  )
182  else:
183  ScaleInitializer = initializers.ExternalInitializer()
184  BiasInitializer = initializers.ExternalInitializer()
185 
186  scale = model.create_param(
187  param_name=blob_out + '_s',
188  shape=[dim_in],
189  initializer=ScaleInitializer,
190  tags=ParameterTags.WEIGHT
191  )
192 
193  bias = model.create_param(
194  param_name=blob_out + '_b',
195  shape=[dim_in],
196  initializer=BiasInitializer,
197  tags=ParameterTags.BIAS
198  )
199 
200  blob_outs = [blob_out,
201  blob_out + "_mean", blob_out + "_std"]
202 
203  blob_outputs = model.net.GroupNorm(
204  [blob_in, scale, bias],
205  blob_outs,
206  **kwargs)
207  # Return the output
208  return blob_outputs[0]
209 
210 
211 def layer_norm(
212  model,
213  blob_in,
214  blob_out,
215  dim_in,
216  axis=1,
217  epsilon=1e-4,
218  initial_scale=1.0,
219  initial_bias=0.0,
220 ):
221  '''
222  Layer normalizes the input, cf. https://arxiv.org/pdf/1607.06450.pdf.
223 
224  Args:
225  blob_in: The input blob to layer normalize.
226  blob_out: The layer normalized output blob.
227  dim_in: The dimension of the scale and bias. For example, if blob_in is
228  a 2D design matrix and axis is 1, this would be the number of
229  columns.
230  axis: (optional) The axis to normalize. Typically the feature axis.
231  Defaults to 1.
232  epsilon: (optional) A small value used for numerical stability in
233  calculation. Defaults to 1e-4.
234  initial_scale: (optional) The initial value for the learned scale
235  parameter. Defaults to 1.0
236  initial_bias: (optional) The initial value for the learned bias
237  parameter of the layerwise standard deviation. Defaults to 0.0.
238 
239  Returns:
240  A 3-tuple consisting of:
241  - The layer normalized input blob.
242  - The mean of the input blob across the given axis.
243  - The standard deviation of the input blob acress the given axis.
244  '''
245 
246  # The LayerNorm operator only performs the layerwise z-shift, without
247  # scaling and shifting by the learned scale and bias parameters. We have
248  # to do that separately below.
249  normalized, mean, stdev = model.net.LayerNorm(
250  [blob_in],
251  [blob_out, blob_out + "_mean", blob_out + "_stdev"],
252  axis=axis,
253  epsilon=epsilon,
254  )
255 
256  # The learned multiplicative scale or "gain".
257  scale = model.create_param(
258  param_name='{}_scale'.format(blob_out),
259  shape=[dim_in],
260  initializer=initializers.Initializer(
261  'ConstantFill',
262  value=initial_scale,
263  ),
264  tags=ParameterTags.WEIGHT,
265  )
266 
267  # The learned additive bias or "shift".
268  bias = model.create_param(
269  param_name='{}_bias'.format(blob_out),
270  shape=[dim_in],
271  initializer=initializers.Initializer(
272  'ConstantFill',
273  value=initial_bias,
274  ),
275  tags=ParameterTags.BIAS,
276  )
277 
278  scaled = model.net.Mul(
279  [normalized, scale],
280  ['{}_scaled'.format(blob_out)],
281  broadcast=1,
282  axis=axis,
283  )
284 
285  biased = model.net.Add(
286  [scaled, bias],
287  ['{}_biased'.format(blob_out)],
288  broadcast=1,
289  axis=axis,
290  )
291 
292  return biased, mean, stdev