 Caffe2 - Python API A deep learning, cross platform ML framework
normalization.py
1 import torch
2 import numbers
3 from torch.nn.parameter import Parameter
4 from .module import Module
5 from .batchnorm import _BatchNorm
6 from .. import functional as F
7 from .. import init
8 from ..._jit_internal import weak_module, weak_script_method
9
10
11 @weak_module
12 class LocalResponseNorm(Module):
13  r"""Applies local response normalization over an input signal composed
14  of several input planes, where channels occupy the second dimension.
15  Applies normalization across channels.
16
17  .. math::
18  b_{c} = a_{c}\left(k + \frac{\alpha}{n}
19  \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}
20
21  Args:
22  size: amount of neighbouring channels used for normalization
23  alpha: multiplicative factor. Default: 0.0001
24  beta: exponent. Default: 0.75
25  k: additive factor. Default: 1
26
27  Shape:
28  - Input: :math:(N, C, *)
29  - Output: :math:(N, C, *) (same shape as input)
30
31  Examples::
32
33  >>> lrn = nn.LocalResponseNorm(2)
34  >>> signal_2d = torch.randn(32, 5, 24, 24)
35  >>> signal_4d = torch.randn(16, 5, 7, 7, 7, 7)
36  >>> output_2d = lrn(signal_2d)
37  >>> output_4d = lrn(signal_4d)
38
39  """
40  __constants__ = ['size', 'alpha', 'beta', 'k']
41
42  def __init__(self, size, alpha=1e-4, beta=0.75, k=1.):
43  super(LocalResponseNorm, self).__init__()
44  self.size = size
45  self.alpha = alpha
46  self.beta = beta
47  self.k = k
48
49  @weak_script_method
50  def forward(self, input):
51  return F.local_response_norm(input, self.size, self.alpha, self.beta,
52  self.k)
53
54  def extra_repr(self):
55  return '{size}, alpha={alpha}, beta={beta}, k={k}'.format(**self.__dict__)
56
57
58 class CrossMapLRN2d(Module):
59
60  def __init__(self, size, alpha=1e-4, beta=0.75, k=1):
61  super(CrossMapLRN2d, self).__init__()
62  self.size = size
63  self.alpha = alpha
64  self.beta = beta
65  self.k = k
66
67  def forward(self, input):
68  return self._backend.CrossMapLRN2d(self.size, self.alpha, self.beta,
69  self.k)(input)
70
71  def extra_repr(self):
72  return '{size}, alpha={alpha}, beta={beta}, k={k}'.format(**self.__dict__)
73
74
75 @weak_module
76 class LayerNorm(Module):
77  r"""Applies Layer Normalization over a mini-batch of inputs as described in
78  the paper Layer Normalization_ .
79
80  .. math::
81  y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
82
83  The mean and standard-deviation are calculated separately over the last
84  certain number dimensions which have to be of the shape specified by
85  :attr:normalized_shape.
86  :math:\gamma and :math:\beta are learnable affine transform parameters of
87  :attr:normalized_shape if :attr:elementwise_affine is True.
88
89  .. note::
90  Unlike Batch Normalization and Instance Normalization, which applies
91  scalar scale and bias for each entire channel/plane with the
92  :attr:affine option, Layer Normalization applies per-element scale and
93  bias with :attr:elementwise_affine.
94
95  This layer uses statistics computed from input data in both training and
96  evaluation modes.
97
98  Args:
99  normalized_shape (int or list or torch.Size): input shape from an expected input
100  of size
101
102  .. math::
103  [* \times \text{normalized\_shape} \times \text{normalized\_shape}
104  \times \ldots \times \text{normalized\_shape}[-1]]
105
106  If a single integer is used, it is treated as a singleton list, and this module will
107  normalize over the last dimension which is expected to be of that specific size.
108  eps: a value added to the denominator for numerical stability. Default: 1e-5
109  elementwise_affine: a boolean value that when set to True, this module
110  has learnable per-element affine parameters initialized to ones (for weights)
111  and zeros (for biases). Default: True.
112
113  Shape:
114  - Input: :math:(N, *)
115  - Output: :math:(N, *) (same shape as input)
116
117  Examples::
118
119  >>> input = torch.randn(20, 5, 10, 10)
120  >>> # With Learnable Parameters
121  >>> m = nn.LayerNorm(input.size()[1:])
122  >>> # Without Learnable Parameters
123  >>> m = nn.LayerNorm(input.size()[1:], elementwise_affine=False)
124  >>> # Normalize over last two dimensions
125  >>> m = nn.LayerNorm([10, 10])
126  >>> # Normalize over last dimension of size 10
127  >>> m = nn.LayerNorm(10)
128  >>> # Activating the module
129  >>> output = m(input)
130
131  .. _Layer Normalization: https://arxiv.org/abs/1607.06450
132  """
133  __constants__ = ['normalized_shape', 'weight', 'bias', 'eps']
134
135  def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
136  super(LayerNorm, self).__init__()
137  if isinstance(normalized_shape, numbers.Integral):
138  normalized_shape = (normalized_shape,)
139  self.normalized_shape = torch.Size(normalized_shape)
140  self.eps = eps
141  self.elementwise_affine = elementwise_affine
142  if self.elementwise_affine:
143  self.weight = Parameter(torch.Tensor(*normalized_shape))
144  self.bias = Parameter(torch.Tensor(*normalized_shape))
145  else:
146  self.register_parameter('weight', None)
147  self.register_parameter('bias', None)
148  self.reset_parameters()
149
150  def reset_parameters(self):
151  if self.elementwise_affine:
152  init.ones_(self.weight)
153  init.zeros_(self.bias)
154
155  @weak_script_method
156  def forward(self, input):
157  return F.layer_norm(
158  input, self.normalized_shape, self.weight, self.bias, self.eps)
159
160  def extra_repr(self):
161  return '{normalized_shape}, eps={eps}, ' \
162  'elementwise_affine={elementwise_affine}'.format(**self.__dict__)
163
164
165 @weak_module
166 class GroupNorm(Module):
167  r"""Applies Group Normalization over a mini-batch of inputs as described in
168  the paper Group Normalization_ .
169
170  .. math::
171  y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
172
173  The input channels are separated into :attr:num_groups groups, each containing
174  num_channels / num_groups channels. The mean and standard-deviation are calculated
175  separately over the each group. :math:\gamma and :math:\beta are learnable
176  per-channel affine transform parameter vectors of size :attr:num_channels if
177  :attr:affine is True.
178
179  This layer uses statistics computed from input data in both training and
180  evaluation modes.
181
182  Args:
183  num_groups (int): number of groups to separate the channels into
184  num_channels (int): number of channels expected in input
185  eps: a value added to the denominator for numerical stability. Default: 1e-5
186  affine: a boolean value that when set to True, this module
187  has learnable per-channel affine parameters initialized to ones (for weights)
188  and zeros (for biases). Default: True.
189
190  Shape:
191  - Input: :math:(N, C, *) where :math:C=\text{num\_channels}
192  - Output: :math:(N, C, *) (same shape as input)
193
194  Examples::
195
196  >>> input = torch.randn(20, 6, 10, 10)
197  >>> # Separate 6 channels into 3 groups
198  >>> m = nn.GroupNorm(3, 6)
199  >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
200  >>> m = nn.GroupNorm(6, 6)
201  >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
202  >>> m = nn.GroupNorm(1, 6)
203  >>> # Activating the module
204  >>> output = m(input)
205
206  .. _Group Normalization: https://arxiv.org/abs/1803.08494
207  """
208  __constants__ = ['num_groups', 'num_channels', 'eps', 'affine', 'weight',
209  'bias']
210
211  def __init__(self, num_groups, num_channels, eps=1e-5, affine=True):
212  super(GroupNorm, self).__init__()
213  self.num_groups = num_groups
214  self.num_channels = num_channels
215  self.eps = eps
216  self.affine = affine
217  if self.affine:
218  self.weight = Parameter(torch.Tensor(num_channels))
219  self.bias = Parameter(torch.Tensor(num_channels))
220  else:
221  self.register_parameter('weight', None)
222  self.register_parameter('bias', None)
223  self.reset_parameters()
224
225  def reset_parameters(self):
226  if self.affine:
227  init.ones_(self.weight)
228  init.zeros_(self.bias)
229
230  @weak_script_method
231  def forward(self, input):
232  return F.group_norm(
233  input, self.num_groups, self.weight, self.bias, self.eps)
234
235  def extra_repr(self):
236  return '{num_groups}, {num_channels}, eps={eps}, ' \
237  'affine={affine}'.format(**self.__dict__)
238
239
240 # TODO: ContrastiveNorm2d
241 # TODO: DivisiveNorm2d
242 # TODO: SubtractiveNorm2d