Caffe2 - Python API
A deep learning, cross platform ML framework
conv.py
1 # coding=utf-8
2 import math
3 import torch
4 from torch.nn.parameter import Parameter
5 from .. import functional as F
6 from .. import init
7 from .module import Module
8 from .utils import _single, _pair, _triple
9 from ..._jit_internal import weak_module, weak_script_method, List
10 
11 
12 @weak_module
13 class _ConvNd(Module):
14 
15  __constants__ = ['stride', 'padding', 'dilation', 'groups', 'bias', 'padding_mode']
16 
17  def __init__(self, in_channels, out_channels, kernel_size, stride,
18  padding, dilation, transposed, output_padding,
19  groups, bias, padding_mode):
20  super(_ConvNd, self).__init__()
21  if in_channels % groups != 0:
22  raise ValueError('in_channels must be divisible by groups')
23  if out_channels % groups != 0:
24  raise ValueError('out_channels must be divisible by groups')
25  self.in_channels = in_channels
26  self.out_channels = out_channels
27  self.kernel_size = kernel_size
28  self.stride = stride
29  self.padding = padding
30  self.dilation = dilation
31  self.transposed = transposed
32  self.output_padding = output_padding
33  self.groups = groups
34  self.padding_mode = padding_mode
35  if transposed:
36  self.weight = Parameter(torch.Tensor(
37  in_channels, out_channels // groups, *kernel_size))
38  else:
39  self.weight = Parameter(torch.Tensor(
40  out_channels, in_channels // groups, *kernel_size))
41  if bias:
42  self.bias = Parameter(torch.Tensor(out_channels))
43  else:
44  self.register_parameter('bias', None)
45  self.reset_parameters()
46 
47  def reset_parameters(self):
48  n = self.in_channels
49  init.kaiming_uniform_(self.weight, a=math.sqrt(5))
50  if self.bias is not None:
51  fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
52  bound = 1 / math.sqrt(fan_in)
53  init.uniform_(self.bias, -bound, bound)
54 
55  def extra_repr(self):
56  s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
57  ', stride={stride}')
58  if self.padding != (0,) * len(self.padding):
59  s += ', padding={padding}'
60  if self.dilation != (1,) * len(self.dilation):
61  s += ', dilation={dilation}'
62  if self.output_padding != (0,) * len(self.output_padding):
63  s += ', output_padding={output_padding}'
64  if self.groups != 1:
65  s += ', groups={groups}'
66  if self.bias is None:
67  s += ', bias=False'
68  return s.format(**self.__dict__)
69 
70 
71 @weak_module
72 class Conv1d(_ConvNd):
73  r"""Applies a 1D convolution over an input signal composed of several input
74  planes.
75 
76  In the simplest case, the output value of the layer with input size
77  :math:`(N, C_{\text{in}}, L)` and output :math:`(N, C_{\text{out}}, L_{\text{out}})` can be
78  precisely described as:
79 
80  .. math::
81  \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
82  \sum_{k = 0}^{C_{in} - 1} \text{weight}(C_{\text{out}_j}, k)
83  \star \text{input}(N_i, k)
84 
85  where :math:`\star` is the valid `cross-correlation`_ operator,
86  :math:`N` is a batch size, :math:`C` denotes a number of channels,
87  :math:`L` is a length of signal sequence.
88 
89  * :attr:`stride` controls the stride for the cross-correlation, a single
90  number or a one-element tuple.
91 
92  * :attr:`padding` controls the amount of implicit zero-paddings on both sides
93  for :attr:`padding` number of points.
94 
95  * :attr:`dilation` controls the spacing between the kernel points; also
96  known as the à trous algorithm. It is harder to describe, but this `link`_
97  has a nice visualization of what :attr:`dilation` does.
98 
99  * :attr:`groups` controls the connections between inputs and outputs.
100  :attr:`in_channels` and :attr:`out_channels` must both be divisible by
101  :attr:`groups`. For example,
102 
103  * At groups=1, all inputs are convolved to all outputs.
104  * At groups=2, the operation becomes equivalent to having two conv
105  layers side by side, each seeing half the input channels,
106  and producing half the output channels, and both subsequently
107  concatenated.
108  * At groups= :attr:`in_channels`, each input channel is convolved with
109  its own set of filters,
110  of size
111  :math:`\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor`.
112 
113  .. note::
114 
115  Depending of the size of your kernel, several (of the last)
116  columns of the input might be lost, because it is a valid
117  `cross-correlation`_, and not a full `cross-correlation`_.
118  It is up to the user to add proper padding.
119 
120  .. note::
121 
122  When `groups == in_channels` and `out_channels == K * in_channels`,
123  where `K` is a positive integer, this operation is also termed in
124  literature as depthwise convolution.
125 
126  In other words, for an input of size :math:`(N, C_{in}, L_{in})`,
127  a depthwise convolution with a depthwise multiplier `K`, can be constructed by arguments
128  :math:`(C_\text{in}=C_{in}, C_\text{out}=C_{in} \times K, ..., \text{groups}=C_{in})`.
129 
130  .. include:: cudnn_deterministic.rst
131 
132  Args:
133  in_channels (int): Number of channels in the input image
134  out_channels (int): Number of channels produced by the convolution
135  kernel_size (int or tuple): Size of the convolving kernel
136  stride (int or tuple, optional): Stride of the convolution. Default: 1
137  padding (int or tuple, optional): Zero-padding added to both sides of
138  the input. Default: 0
139  padding_mode (string, optional). Accepted values `zeros` and `circular` Default: `zeros`
140  dilation (int or tuple, optional): Spacing between kernel
141  elements. Default: 1
142  groups (int, optional): Number of blocked connections from input
143  channels to output channels. Default: 1
144  bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
145 
146  Shape:
147  - Input: :math:`(N, C_{in}, L_{in})`
148  - Output: :math:`(N, C_{out}, L_{out})` where
149 
150  .. math::
151  L_{out} = \left\lfloor\frac{L_{in} + 2 \times \text{padding} - \text{dilation}
152  \times (\text{kernel\_size} - 1) - 1}{\text{stride}} + 1\right\rfloor
153 
154  Attributes:
155  weight (Tensor): the learnable weights of the module of shape
156  :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}}, \text{kernel\_size})`.
157  The values of these weights are sampled from
158  :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
159  :math:`k = \frac{1}{C_\text{in} * \text{kernel\_size}}`
160  bias (Tensor): the learnable bias of the module of shape
161  (out_channels). If :attr:`bias` is ``True``, then the values of these weights are
162  sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
163  :math:`k = \frac{1}{C_\text{in} * \text{kernel\_size}}`
164 
165  Examples::
166 
167  >>> m = nn.Conv1d(16, 33, 3, stride=2)
168  >>> input = torch.randn(20, 16, 50)
169  >>> output = m(input)
170 
171  .. _cross-correlation:
172  https://en.wikipedia.org/wiki/Cross-correlation
173 
174  .. _link:
175  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
176  """
177 
178  def __init__(self, in_channels, out_channels, kernel_size, stride=1,
179  padding=0, dilation=1, groups=1,
180  bias=True, padding_mode='zeros'):
181  kernel_size = _single(kernel_size)
182  stride = _single(stride)
183  padding = _single(padding)
184  dilation = _single(dilation)
185  super(Conv1d, self).__init__(
186  in_channels, out_channels, kernel_size, stride, padding, dilation,
187  False, _single(0), groups, bias, padding_mode)
188 
189  @weak_script_method
190  def forward(self, input):
191  if self.padding_mode == 'circular':
192  expanded_padding = ((self.padding[0] + 1) // 2, self.padding[0] // 2)
193  return F.conv1d(F.pad(input, expanded_padding, mode='circular'),
194  self.weight, self.bias, self.stride,
195  _single(0), self.dilation, self.groups)
196  return F.conv1d(input, self.weight, self.bias, self.stride,
197  self.padding, self.dilation, self.groups)
198 
199 
200 @weak_module
202  r"""Applies a 2D convolution over an input signal composed of several input
203  planes.
204 
205  In the simplest case, the output value of the layer with input size
206  :math:`(N, C_{\text{in}}, H, W)` and output :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})`
207  can be precisely described as:
208 
209  .. math::
210  \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
211  \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)
212 
213 
214  where :math:`\star` is the valid 2D `cross-correlation`_ operator,
215  :math:`N` is a batch size, :math:`C` denotes a number of channels,
216  :math:`H` is a height of input planes in pixels, and :math:`W` is
217  width in pixels.
218 
219  * :attr:`stride` controls the stride for the cross-correlation, a single
220  number or a tuple.
221 
222  * :attr:`padding` controls the amount of implicit zero-paddings on both
223  sides for :attr:`padding` number of points for each dimension.
224 
225  * :attr:`dilation` controls the spacing between the kernel points; also
226  known as the à trous algorithm. It is harder to describe, but this `link`_
227  has a nice visualization of what :attr:`dilation` does.
228 
229  * :attr:`groups` controls the connections between inputs and outputs.
230  :attr:`in_channels` and :attr:`out_channels` must both be divisible by
231  :attr:`groups`. For example,
232 
233  * At groups=1, all inputs are convolved to all outputs.
234  * At groups=2, the operation becomes equivalent to having two conv
235  layers side by side, each seeing half the input channels,
236  and producing half the output channels, and both subsequently
237  concatenated.
238  * At groups= :attr:`in_channels`, each input channel is convolved with
239  its own set of filters, of size:
240  :math:`\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor`.
241 
242  The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
243 
244  - a single ``int`` -- in which case the same value is used for the height and width dimension
245  - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
246  and the second `int` for the width dimension
247 
248  .. note::
249 
250  Depending of the size of your kernel, several (of the last)
251  columns of the input might be lost, because it is a valid `cross-correlation`_,
252  and not a full `cross-correlation`_.
253  It is up to the user to add proper padding.
254 
255  .. note::
256 
257  When `groups == in_channels` and `out_channels == K * in_channels`,
258  where `K` is a positive integer, this operation is also termed in
259  literature as depthwise convolution.
260 
261  In other words, for an input of size :math:`(N, C_{in}, H_{in}, W_{in})`,
262  a depthwise convolution with a depthwise multiplier `K`, can be constructed by arguments
263  :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`.
264 
265  .. include:: cudnn_deterministic.rst
266 
267  Args:
268  in_channels (int): Number of channels in the input image
269  out_channels (int): Number of channels produced by the convolution
270  kernel_size (int or tuple): Size of the convolving kernel
271  stride (int or tuple, optional): Stride of the convolution. Default: 1
272  padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
273  padding_mode (string, optional). Accepted values `zeros` and `circular` Default: `zeros`
274  dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
275  groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
276  bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
277 
278  Shape:
279  - Input: :math:`(N, C_{in}, H_{in}, W_{in})`
280  - Output: :math:`(N, C_{out}, H_{out}, W_{out})` where
281 
282  .. math::
283  H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[0] - \text{dilation}[0]
284  \times (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
285 
286  .. math::
287  W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[1] - \text{dilation}[1]
288  \times (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
289 
290  Attributes:
291  weight (Tensor): the learnable weights of the module of shape
292  :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},
293  \text{kernel\_size[0]}, \text{kernel\_size[1]})`.
294  The values of these weights are sampled from
295  :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
296  :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
297  bias (Tensor): the learnable bias of the module of shape (out_channels). If :attr:`bias` is ``True``,
298  then the values of these weights are
299  sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
300  :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
301 
302  Examples::
303 
304  >>> # With square kernels and equal stride
305  >>> m = nn.Conv2d(16, 33, 3, stride=2)
306  >>> # non-square kernels and unequal stride and with padding
307  >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
308  >>> # non-square kernels and unequal stride and with padding and dilation
309  >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
310  >>> input = torch.randn(20, 16, 50, 100)
311  >>> output = m(input)
312 
313  .. _cross-correlation:
314  https://en.wikipedia.org/wiki/Cross-correlation
315 
316  .. _link:
317  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
318  """
319  def __init__(self, in_channels, out_channels, kernel_size, stride=1,
320  padding=0, dilation=1, groups=1,
321  bias=True, padding_mode='zeros'):
322  kernel_size = _pair(kernel_size)
323  stride = _pair(stride)
324  padding = _pair(padding)
325  dilation = _pair(dilation)
326  super(Conv2d, self).__init__(
327  in_channels, out_channels, kernel_size, stride, padding, dilation,
328  False, _pair(0), groups, bias, padding_mode)
329 
330  @weak_script_method
331  def forward(self, input):
332  if self.padding_mode == 'circular':
333  expanded_padding = ((self.padding[1] + 1) // 2, self.padding[1] // 2,
334  (self.padding[0] + 1) // 2, self.padding[0] // 2)
335  return F.conv2d(F.pad(input, expanded_padding, mode='circular'),
336  self.weight, self.bias, self.stride,
337  _pair(0), self.dilation, self.groups)
338  return F.conv2d(input, self.weight, self.bias, self.stride,
339  self.padding, self.dilation, self.groups)
340 
341 
342 @weak_module
344  r"""Applies a 3D convolution over an input signal composed of several input
345  planes.
346 
347  In the simplest case, the output value of the layer with input size :math:`(N, C_{in}, D, H, W)`
348  and output :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` can be precisely described as:
349 
350  .. math::
351  out(N_i, C_{out_j}) = bias(C_{out_j}) +
352  \sum_{k = 0}^{C_{in} - 1} weight(C_{out_j}, k) \star input(N_i, k)
353 
354  where :math:`\star` is the valid 3D `cross-correlation`_ operator
355 
356  * :attr:`stride` controls the stride for the cross-correlation.
357 
358  * :attr:`padding` controls the amount of implicit zero-paddings on both
359  sides for :attr:`padding` number of points for each dimension.
360 
361  * :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
362  It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
363 
364  * :attr:`groups` controls the connections between inputs and outputs.
365  :attr:`in_channels` and :attr:`out_channels` must both be divisible by
366  :attr:`groups`. For example,
367 
368  * At groups=1, all inputs are convolved to all outputs.
369  * At groups=2, the operation becomes equivalent to having two conv
370  layers side by side, each seeing half the input channels,
371  and producing half the output channels, and both subsequently
372  concatenated.
373  * At groups= :attr:`in_channels`, each input channel is convolved with
374  its own set of filters, of size
375  :math:`\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor`.
376 
377  The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
378 
379  - a single ``int`` -- in which case the same value is used for the depth, height and width dimension
380  - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension,
381  the second `int` for the height dimension and the third `int` for the width dimension
382 
383  .. note::
384 
385  Depending of the size of your kernel, several (of the last)
386  columns of the input might be lost, because it is a valid `cross-correlation`_,
387  and not a full `cross-correlation`_.
388  It is up to the user to add proper padding.
389 
390  .. note::
391 
392  When `groups == in_channels` and `out_channels == K * in_channels`,
393  where `K` is a positive integer, this operation is also termed in
394  literature as depthwise convolution.
395 
396  In other words, for an input of size :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`,
397  a depthwise convolution with a depthwise multiplier `K`, can be constructed by arguments
398  :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`.
399 
400  .. include:: cudnn_deterministic.rst
401 
402  Args:
403  in_channels (int): Number of channels in the input image
404  out_channels (int): Number of channels produced by the convolution
405  kernel_size (int or tuple): Size of the convolving kernel
406  stride (int or tuple, optional): Stride of the convolution. Default: 1
407  padding (int or tuple, optional): Zero-padding added to all three sides of the input. Default: 0
408  padding_mode (string, optional). Accepted values `zeros` and `circular` Default: `zeros`
409  dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
410  groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
411  bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
412 
413  Shape:
414  - Input: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
415  - Output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` where
416 
417  .. math::
418  D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0]
419  \times (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
420 
421  .. math::
422  H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1]
423  \times (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
424 
425  .. math::
426  W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2]
427  \times (\text{kernel\_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor
428 
429  Attributes:
430  weight (Tensor): the learnable weights of the module of shape
431  :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},
432  \text{kernel\_size[0]}, \text{kernel\_size[1]}, \text{kernel\_size[2]})`.
433  The values of these weights are sampled from
434  :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
435  :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{2}\text{kernel\_size}[i]}`
436  bias (Tensor): the learnable bias of the module of shape (out_channels). If :attr:`bias` is ``True``,
437  then the values of these weights are
438  sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
439  :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{2}\text{kernel\_size}[i]}`
440 
441  Examples::
442 
443  >>> # With square kernels and equal stride
444  >>> m = nn.Conv3d(16, 33, 3, stride=2)
445  >>> # non-square kernels and unequal stride and with padding
446  >>> m = nn.Conv3d(16, 33, (3, 5, 2), stride=(2, 1, 1), padding=(4, 2, 0))
447  >>> input = torch.randn(20, 16, 10, 50, 100)
448  >>> output = m(input)
449 
450  .. _cross-correlation:
451  https://en.wikipedia.org/wiki/Cross-correlation
452 
453  .. _link:
454  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
455  """
456  def __init__(self, in_channels, out_channels, kernel_size, stride=1,
457  padding=0, dilation=1, groups=1,
458  bias=True, padding_mode='zeros'):
459  kernel_size = _triple(kernel_size)
460  stride = _triple(stride)
461  padding = _triple(padding)
462  dilation = _triple(dilation)
463  super(Conv3d, self).__init__(
464  in_channels, out_channels, kernel_size, stride, padding, dilation,
465  False, _triple(0), groups, bias, padding_mode)
466 
467  @weak_script_method
468  def forward(self, input):
469  if self.padding_mode == 'circular':
470  expanded_padding = ((self.padding[2] + 1) // 2, self.padding[2] // 2,
471  (self.padding[1] + 1) // 2, self.padding[1] // 2,
472  (self.padding[0] + 1) // 2, self.padding[0] // 2)
473  return F.conv3d(F.pad(input, expanded_padding, mode='circular'),
474  self.weight, self.bias, self.stride, _triple(0),
475  self.dilation, self.groups)
476  return F.conv3d(input, self.weight, self.bias, self.stride,
477  self.padding, self.dilation, self.groups)
478 
479 
480 @weak_module
481 class _ConvTransposeMixin(object):
482  __constants__ = ['stride', 'padding', 'kernel_size', 'dim_size',
483  'output_padding', 'groups', 'dilation', 'transposed',
484  'bias', 'padding_mode']
485 
486  @weak_script_method
487  def forward(self, input, output_size=None):
488  # type(Tensor, Optional[List[int]]) -> Tensor
489  output_padding = self._output_padding(input, output_size, self.stride, self.padding, self.kernel_size)
490  func = self._backend.ConvNd(
491  self.stride, self.padding, self.dilation, self.transposed,
492  output_padding, self.groups)
493  if self.bias is None:
494  return func(input, self.weight)
495  else:
496  return func(input, self.weight, self.bias)
497 
498  @weak_script_method
499  def _output_padding(self, input, output_size, stride, padding, kernel_size):
500  # type: (Tensor, Optional[List[int]], List[int], List[int], List[int]) -> List[int]
501  if output_size is None:
502  ret = _single(self.output_padding) # converting to list if was not already
503  else:
504  k = input.dim() - 2
505  if len(output_size) == k + 2:
506  output_size = output_size[2:]
507  if len(output_size) != k:
508  raise ValueError(
509  "output_size must have {} or {} elements (got {})"
510  .format(k, k + 2, len(output_size)))
511 
512  min_sizes = torch.jit.annotate(List[int], [])
513  max_sizes = torch.jit.annotate(List[int], [])
514  for d in range(k):
515  dim_size = ((input.size(d + 2) - 1) * stride[d] -
516  2 * padding[d] + kernel_size[d])
517  min_sizes.append(dim_size)
518  max_sizes.append(min_sizes[d] + stride[d] - 1)
519 
520  for i in range(len(output_size)):
521  size = output_size[i]
522  min_size = min_sizes[i]
523  max_size = max_sizes[i]
524  if size < min_size or size > max_size:
525  raise ValueError((
526  "requested an output size of {}, but valid sizes range "
527  "from {} to {} (for an input of {})").format(
528  output_size, min_sizes, max_sizes, input.size()[2:]))
529 
530  res = torch.jit.annotate(List[int], [])
531  for d in range(k):
532  res.append(output_size[d] - min_sizes[d])
533 
534  ret = res
535  return ret
536 
537 
538 @weak_module
540  r"""Applies a 1D transposed convolution operator over an input image
541  composed of several input planes.
542 
543  This module can be seen as the gradient of Conv1d with respect to its input.
544  It is also known as a fractionally-strided convolution or
545  a deconvolution (although it is not an actual deconvolution operation).
546 
547  * :attr:`stride` controls the stride for the cross-correlation.
548 
549  * :attr:`padding` controls the amount of implicit zero-paddings on both
550  sides for ``dilation * (kernel_size - 1) - padding`` number of points. See note
551  below for details.
552 
553  * :attr:`output_padding` controls the additional size added to one side
554  of the output shape. See note below for details.
555 
556  * :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
557  It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
558 
559  * :attr:`groups` controls the connections between inputs and outputs.
560  :attr:`in_channels` and :attr:`out_channels` must both be divisible by
561  :attr:`groups`. For example,
562 
563  * At groups=1, all inputs are convolved to all outputs.
564  * At groups=2, the operation becomes equivalent to having two conv
565  layers side by side, each seeing half the input channels,
566  and producing half the output channels, and both subsequently
567  concatenated.
568  * At groups= :attr:`in_channels`, each input channel is convolved with
569  its own set of filters (of size
570  :math:`\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor`).
571 
572  .. note::
573 
574  Depending of the size of your kernel, several (of the last)
575  columns of the input might be lost, because it is a valid `cross-correlation`_,
576  and not a full `cross-correlation`_.
577  It is up to the user to add proper padding.
578 
579  .. note::
580  The :attr:`padding` argument effectively adds ``dilation * (kernel_size - 1) - padding``
581  amount of zero padding to both sizes of the input. This is set so that
582  when a :class:`~torch.nn.Conv1d` and a :class:`~torch.nn.ConvTranspose1d`
583  are initialized with same parameters, they are inverses of each other in
584  regard to the input and output shapes. However, when ``stride > 1``,
585  :class:`~torch.nn.Conv1d` maps multiple input shapes to the same output
586  shape. :attr:`output_padding` is provided to resolve this ambiguity by
587  effectively increasing the calculated output shape on one side. Note
588  that :attr:`output_padding` is only used to find output shape, but does
589  not actually add zero-padding to output.
590 
591  .. include:: cudnn_deterministic.rst
592 
593  Args:
594  in_channels (int): Number of channels in the input image
595  out_channels (int): Number of channels produced by the convolution
596  kernel_size (int or tuple): Size of the convolving kernel
597  stride (int or tuple, optional): Stride of the convolution. Default: 1
598  padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding
599  will be added to both sides of the input. Default: 0
600  output_padding (int or tuple, optional): Additional size added to one side
601  of the output shape. Default: 0
602  groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
603  bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
604  dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
605 
606  Shape:
607  - Input: :math:`(N, C_{in}, L_{in})`
608  - Output: :math:`(N, C_{out}, L_{out})` where
609 
610  .. math::
611  L_{out} = (L_{in} - 1) \times \text{stride} - 2 \times \text{padding} + \text{dilation}
612  \times (\text{kernel\_size} - 1) + \text{output\_padding} + 1
613 
614  Attributes:
615  weight (Tensor): the learnable weights of the module of shape
616  :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}},
617  \text{kernel\_size})`. The values of these weights are sampled from
618  :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
619  :math:`k = \frac{1}{C_\text{in} * \text{kernel\_size}}`
620  bias (Tensor): the learnable bias of the module of shape (out_channels).
621  If :attr:`bias` is ``True``, then the values of these weights are
622  sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
623  :math:`k = \frac{1}{C_\text{in} * \text{kernel\_size}}`
624  """
625 
626  def __init__(self, in_channels, out_channels, kernel_size, stride=1,
627  padding=0, output_padding=0, groups=1, bias=True,
628  dilation=1, padding_mode='zeros'):
629  kernel_size = _single(kernel_size)
630  stride = _single(stride)
631  padding = _single(padding)
632  dilation = _single(dilation)
633  output_padding = _single(output_padding)
634  super(ConvTranspose1d, self).__init__(
635  in_channels, out_channels, kernel_size, stride, padding, dilation,
636  True, output_padding, groups, bias, padding_mode)
637 
638  @weak_script_method
639  def forward(self, input, output_size=None):
640  # type: (Tensor, Optional[List[int]]) -> Tensor
641  if self.padding_mode != 'zeros':
642  raise ValueError('Only `zeros` padding mode is supported for ConvTranspose1d')
643 
644  output_padding = self._output_padding(input, output_size, self.stride, self.padding, self.kernel_size)
645  return F.conv_transpose1d(
646  input, self.weight, self.bias, self.stride, self.padding,
647  output_padding, self.groups, self.dilation)
648 
649 
650 @weak_module
652  r"""Applies a 2D transposed convolution operator over an input image
653  composed of several input planes.
654 
655  This module can be seen as the gradient of Conv2d with respect to its input.
656  It is also known as a fractionally-strided convolution or
657  a deconvolution (although it is not an actual deconvolution operation).
658 
659  * :attr:`stride` controls the stride for the cross-correlation.
660 
661  * :attr:`padding` controls the amount of implicit zero-paddings on both
662  sides for ``dilation * (kernel_size - 1) - padding`` number of points. See note
663  below for details.
664 
665  * :attr:`output_padding` controls the additional size added to one side
666  of the output shape. See note below for details.
667 
668  * :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
669  It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
670 
671  * :attr:`groups` controls the connections between inputs and outputs.
672  :attr:`in_channels` and :attr:`out_channels` must both be divisible by
673  :attr:`groups`. For example,
674 
675  * At groups=1, all inputs are convolved to all outputs.
676  * At groups=2, the operation becomes equivalent to having two conv
677  layers side by side, each seeing half the input channels,
678  and producing half the output channels, and both subsequently
679  concatenated.
680  * At groups= :attr:`in_channels`, each input channel is convolved with
681  its own set of filters (of size
682  :math:`\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor`).
683 
684  The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`output_padding`
685  can either be:
686 
687  - a single ``int`` -- in which case the same value is used for the height and width dimensions
688  - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
689  and the second `int` for the width dimension
690 
691  .. note::
692 
693  Depending of the size of your kernel, several (of the last)
694  columns of the input might be lost, because it is a valid `cross-correlation`_,
695  and not a full `cross-correlation`_.
696  It is up to the user to add proper padding.
697 
698  .. note::
699  The :attr:`padding` argument effectively adds ``dilation * (kernel_size - 1) - padding``
700  amount of zero padding to both sizes of the input. This is set so that
701  when a :class:`~torch.nn.Conv2d` and a :class:`~torch.nn.ConvTranspose2d`
702  are initialized with same parameters, they are inverses of each other in
703  regard to the input and output shapes. However, when ``stride > 1``,
704  :class:`~torch.nn.Conv2d` maps multiple input shapes to the same output
705  shape. :attr:`output_padding` is provided to resolve this ambiguity by
706  effectively increasing the calculated output shape on one side. Note
707  that :attr:`output_padding` is only used to find output shape, but does
708  not actually add zero-padding to output.
709 
710  .. include:: cudnn_deterministic.rst
711 
712  Args:
713  in_channels (int): Number of channels in the input image
714  out_channels (int): Number of channels produced by the convolution
715  kernel_size (int or tuple): Size of the convolving kernel
716  stride (int or tuple, optional): Stride of the convolution. Default: 1
717  padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding
718  will be added to both sides of each dimension in the input. Default: 0
719  output_padding (int or tuple, optional): Additional size added to one side
720  of each dimension in the output shape. Default: 0
721  groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
722  bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
723  dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
724 
725  Shape:
726  - Input: :math:`(N, C_{in}, H_{in}, W_{in})`
727  - Output: :math:`(N, C_{out}, H_{out}, W_{out})` where
728 
729  .. math::
730  H_{out} = (H_{in} - 1) \times \text{stride}[0] - 2 \times \text{padding}[0] + \text{dilation}[0]
731  \times (\text{kernel\_size}[0] - 1) + \text{output\_padding}[0] + 1
732  .. math::
733  W_{out} = (W_{in} - 1) \times \text{stride}[1] - 2 \times \text{padding}[1] + \text{dilation}[1]
734  \times (\text{kernel\_size}[1] - 1) + \text{output\_padding}[1] + 1
735 
736  Attributes:
737  weight (Tensor): the learnable weights of the module of shape
738  :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}},
739  \text{kernel\_size[0]}, \text{kernel\_size[1]})`.
740  The values of these weights are sampled from
741  :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
742  :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
743  bias (Tensor): the learnable bias of the module of shape (out_channels)
744  If :attr:`bias` is ``True``, then the values of these weights are
745  sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
746  :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
747 
748  Examples::
749 
750  >>> # With square kernels and equal stride
751  >>> m = nn.ConvTranspose2d(16, 33, 3, stride=2)
752  >>> # non-square kernels and unequal stride and with padding
753  >>> m = nn.ConvTranspose2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
754  >>> input = torch.randn(20, 16, 50, 100)
755  >>> output = m(input)
756  >>> # exact output size can be also specified as an argument
757  >>> input = torch.randn(1, 16, 12, 12)
758  >>> downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
759  >>> upsample = nn.ConvTranspose2d(16, 16, 3, stride=2, padding=1)
760  >>> h = downsample(input)
761  >>> h.size()
762  torch.Size([1, 16, 6, 6])
763  >>> output = upsample(h, output_size=input.size())
764  >>> output.size()
765  torch.Size([1, 16, 12, 12])
766 
767  .. _cross-correlation:
768  https://en.wikipedia.org/wiki/Cross-correlation
769 
770  .. _link:
771  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
772  """
773 
774  def __init__(self, in_channels, out_channels, kernel_size, stride=1,
775  padding=0, output_padding=0, groups=1, bias=True,
776  dilation=1, padding_mode='zeros'):
777  kernel_size = _pair(kernel_size)
778  stride = _pair(stride)
779  padding = _pair(padding)
780  dilation = _pair(dilation)
781  output_padding = _pair(output_padding)
782  super(ConvTranspose2d, self).__init__(
783  in_channels, out_channels, kernel_size, stride, padding, dilation,
784  True, output_padding, groups, bias, padding_mode)
785 
786  @weak_script_method
787  def forward(self, input, output_size=None):
788  # type: (Tensor, Optional[List[int]]) -> Tensor
789  if self.padding_mode != 'zeros':
790  raise ValueError('Only `zeros` padding mode is supported for ConvTranspose2d')
791 
792  output_padding = self._output_padding(input, output_size, self.stride, self.padding, self.kernel_size)
793 
794  return F.conv_transpose2d(
795  input, self.weight, self.bias, self.stride, self.padding,
796  output_padding, self.groups, self.dilation)
797 
798 
799 @weak_module
801  r"""Applies a 3D transposed convolution operator over an input image composed of several input
802  planes.
803  The transposed convolution operator multiplies each input value element-wise by a learnable kernel,
804  and sums over the outputs from all input feature planes.
805 
806  This module can be seen as the gradient of Conv3d with respect to its input.
807  It is also known as a fractionally-strided convolution or
808  a deconvolution (although it is not an actual deconvolution operation).
809 
810  * :attr:`stride` controls the stride for the cross-correlation.
811 
812  * :attr:`padding` controls the amount of implicit zero-paddings on both
813  sides for ``dilation * (kernel_size - 1) - padding`` number of points. See note
814  below for details.
815 
816  * :attr:`output_padding` controls the additional size added to one side
817  of the output shape. See note below for details.
818 
819  * :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
820  It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
821 
822  * :attr:`groups` controls the connections between inputs and outputs.
823  :attr:`in_channels` and :attr:`out_channels` must both be divisible by
824  :attr:`groups`. For example,
825 
826  * At groups=1, all inputs are convolved to all outputs.
827  * At groups=2, the operation becomes equivalent to having two conv
828  layers side by side, each seeing half the input channels,
829  and producing half the output channels, and both subsequently
830  concatenated.
831  * At groups= :attr:`in_channels`, each input channel is convolved with
832  its own set of filters (of size
833  :math:`\left\lfloor\frac{out\_channels}{in\_channels}\right\rfloor`).
834 
835  The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`output_padding`
836  can either be:
837 
838  - a single ``int`` -- in which case the same value is used for the depth, height and width dimensions
839  - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension,
840  the second `int` for the height dimension and the third `int` for the width dimension
841 
842  .. note::
843 
844  Depending of the size of your kernel, several (of the last)
845  columns of the input might be lost, because it is a valid `cross-correlation`_,
846  and not a full `cross-correlation`_.
847  It is up to the user to add proper padding.
848 
849  .. note::
850  The :attr:`padding` argument effectively adds ``dilation * (kernel_size - 1) - padding``
851  amount of zero padding to both sizes of the input. This is set so that
852  when a :class:`~torch.nn.Conv3d` and a :class:`~torch.nn.ConvTranspose3d`
853  are initialized with same parameters, they are inverses of each other in
854  regard to the input and output shapes. However, when ``stride > 1``,
855  :class:`~torch.nn.Conv3d` maps multiple input shapes to the same output
856  shape. :attr:`output_padding` is provided to resolve this ambiguity by
857  effectively increasing the calculated output shape on one side. Note
858  that :attr:`output_padding` is only used to find output shape, but does
859  not actually add zero-padding to output.
860 
861  .. include:: cudnn_deterministic.rst
862 
863  Args:
864  in_channels (int): Number of channels in the input image
865  out_channels (int): Number of channels produced by the convolution
866  kernel_size (int or tuple): Size of the convolving kernel
867  stride (int or tuple, optional): Stride of the convolution. Default: 1
868  padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding
869  will be added to both sides of each dimension in the input. Default: 0
870  output_padding (int or tuple, optional): Additional size added to one side
871  of each dimension in the output shape. Default: 0
872  groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
873  bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
874  dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
875 
876  Shape:
877  - Input: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
878  - Output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` where
879 
880  .. math::
881  D_{out} = (D_{in} - 1) \times \text{stride}[0] - 2 \times \text{padding}[0] + \text{dilation}[0]
882  \times (\text{kernel\_size}[0] - 1) + \text{output\_padding}[0] + 1
883  .. math::
884  H_{out} = (H_{in} - 1) \times \text{stride}[1] - 2 \times \text{padding}[1] + \text{dilation}[1]
885  \times (\text{kernel\_size}[1] - 1) + \text{output\_padding}[1] + 1
886  .. math::
887  W_{out} = (W_{in} - 1) \times \text{stride}[2] - 2 \times \text{padding}[2] + \text{dilation}[2]
888  \times (\text{kernel\_size}[2] - 1) + \text{output\_padding}[2] + 1
889 
890 
891  Attributes:
892  weight (Tensor): the learnable weights of the module of shape
893  :math:`(\text{in\_channels}, \frac{\text{out\_channels}}{\text{groups}},
894  \text{kernel\_size[0]}, \text{kernel\_size[1]}, \text{kernel\_size[2]})`.
895  The values of these weights are sampled from
896  :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
897  :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{2}\text{kernel\_size}[i]}`
898  bias (Tensor): the learnable bias of the module of shape (out_channels)
899  If :attr:`bias` is ``True``, then the values of these weights are
900  sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
901  :math:`k = \frac{1}{C_\text{in} * \prod_{i=0}^{2}\text{kernel\_size}[i]}`
902 
903  Examples::
904 
905  >>> # With square kernels and equal stride
906  >>> m = nn.ConvTranspose3d(16, 33, 3, stride=2)
907  >>> # non-square kernels and unequal stride and with padding
908  >>> m = nn.ConvTranspose3d(16, 33, (3, 5, 2), stride=(2, 1, 1), padding=(0, 4, 2))
909  >>> input = torch.randn(20, 16, 10, 50, 100)
910  >>> output = m(input)
911 
912  .. _cross-correlation:
913  https://en.wikipedia.org/wiki/Cross-correlation
914 
915  .. _link:
916  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
917  """
918 
919  def __init__(self, in_channels, out_channels, kernel_size, stride=1,
920  padding=0, output_padding=0, groups=1, bias=True,
921  dilation=1, padding_mode='zeros'):
922  kernel_size = _triple(kernel_size)
923  stride = _triple(stride)
924  padding = _triple(padding)
925  dilation = _triple(dilation)
926  output_padding = _triple(output_padding)
927  super(ConvTranspose3d, self).__init__(
928  in_channels, out_channels, kernel_size, stride, padding, dilation,
929  True, output_padding, groups, bias, padding_mode)
930 
931  @weak_script_method
932  def forward(self, input, output_size=None):
933  # type: (Tensor, Optional[List[int]]) -> Tensor
934  if self.padding_mode != 'zeros':
935  raise ValueError('Only `zeros` padding mode is supported for ConvTranspose3d')
936 
937  output_padding = self._output_padding(input, output_size, self.stride, self.padding, self.kernel_size)
938 
939  return F.conv_transpose3d(
940  input, self.weight, self.bias, self.stride, self.padding,
941  output_padding, self.groups, self.dilation)
942 
943 
944 # TODO: Conv2dLocal
945 # TODO: Conv2dMap
946 # TODO: ConvTranspose2dMap
def annotate(the_type, the_value)
Definition: __init__.py:1560
def reset_parameters(self)
Definition: conv.py:47
def _output_padding(self, input, output_size, stride, padding, kernel_size)
Definition: conv.py:499