Caffe2 - Python API
A deep learning, cross platform ML framework
fold.py
1 # coding=utf-8
2 from .module import Module
3 from .. import functional as F
4 from ..._jit_internal import weak_module, weak_script_method
5 
6 
7 @weak_module
8 class Fold(Module):
9  r"""Combines an array of sliding local blocks into a large containing
10  tensor.
11 
12  Consider a batched :attr:`input` tensor containing sliding local blocks,
13  e.g., patches of images, of shape :math:`(N, C \times \prod(\text{kernel\_size}), L)`,
14  where :math:`N` is batch dimension, :math:`C \times \prod(\text{kernel\_size})`
15  is the number of values within a block (a block has :math:`\prod(\text{kernel\_size})`
16  spatial locations each containing a :math:`C`-channeled vector), and
17  :math:`L` is the total number of blocks. (This is exactly the
18  same specification as the output shape of :class:`~torch.nn.Unfold`.) This
19  operation combines these local blocks into the large :attr:`output` tensor
20  of shape :math:`(N, C, \text{output\_size}[0], \text{output\_size}[1], \dots)`
21  by summing the overlapping values. Similar to :class:`~torch.nn.Unfold`, the
22  arguments must satisfy
23 
24  .. math::
25  L = \prod_d \left\lfloor\frac{\text{output\_size}[d] + 2 \times \text{padding}[d] %
26  - \text{dilation}[d] \times (\text{kernel\_size}[d] - 1) - 1}{\text{stride}[d]} + 1\right\rfloor,
27 
28  where :math:`d` is over all spatial dimensions.
29 
30  * :attr:`output_size` describes the spatial shape of the large containing
31  tensor of the sliding local blocks. It is useful to resolve the ambiguity
32  when multiple input shapes map to same number of sliding blocks, e.g.,
33  with ``stride > 0``.
34 
35  The :attr:`padding`, :attr:`stride` and :attr:`dilation` arguments specify
36  how the sliding blocks are retrieved.
37 
38  * :attr:`stride` controls the stride for the sliding blocks.
39 
40  * :attr:`padding` controls the amount of implicit zero-paddings on both
41  sides for :attr:`padding` number of points for each dimension before
42  reshaping.
43 
44  * :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
45  It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
46 
47  Args:
48  output_size (int or tuple): the shape of the spatial dimensions of the
49  output (i.e., ``output.sizes()[2:]``)
50  kernel_size (int or tuple): the size of the sliding blocks
51  stride (int or tuple): the stride of the sliding blocks in the input
52  spatial dimensions. Default: 1
53  padding (int or tuple, optional): implicit zero padding to be added on
54  both sides of input. Default: 0
55  dilation (int or tuple, optional): a parameter that controls the
56  stride of elements within the
57  neighborhood. Default: 1
58 
59  * If :attr:`output_size`, :attr:`kernel_size`, :attr:`dilation`,
60  :attr:`padding` or :attr:`stride` is an int or a tuple of length 1 then
61  their values will be replicated across all spatial dimensions.
62 
63  * For the case of two output spatial dimensions this operation is sometimes
64  called ``col2im``.
65 
66  .. note::
67  :class:`~torch.nn.Fold` calculates each combined value in the resulting
68  large tensor by summing all values from all containing blocks.
69  :class:`~torch.nn.Unfold` extracts the values in the local blocks by
70  copying from the large tensor. So, if the blocks overlap, they are not
71  inverses of each other.
72 
73  .. warning::
74  Currently, only 4-D output tensors (batched image-like tensors) are
75  supported.
76 
77  Shape:
78  - Input: :math:`(N, C \times \prod(\text{kernel\_size}), L)`
79  - Output: :math:`(N, C, \text{output\_size}[0], \text{output\_size}[1], \dots)` as described above
80 
81  Examples::
82 
83  >>> fold = nn.Fold(output_size=(4, 5), kernel_size=(2, 2))
84  >>> input = torch.randn(1, 3 * 2 * 2, 12)
85  >>> output = fold(input)
86  >>> output.size()
87  torch.Size([1, 3, 4, 5])
88 
89  .. _link:
90  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
91 
92  """
93  __constants__ = ['output_size', 'kernel_size', 'dilation', 'padding',
94  'stride']
95 
96  def __init__(self, output_size, kernel_size, dilation=1, padding=0, stride=1):
97  super(Fold, self).__init__()
98  self.output_size = output_size
99  self.kernel_size = kernel_size
100  self.dilation = dilation
101  self.padding = padding
102  self.stride = stride
103 
104  @weak_script_method
105  def forward(self, input):
106  return F.fold(input, self.output_size, self.kernel_size, self.dilation,
107  self.padding, self.stride)
108 
109  def extra_repr(self):
110  return 'output_size={output_size}, kernel_size={kernel_size}, ' \
111  'dilation={dilation}, padding={padding}, stride={stride}'.format(
112  **self.__dict__
113  )
114 
115 
116 @weak_module
117 class Unfold(Module):
118  r"""Extracts sliding local blocks from a batched input tensor.
119 
120  Consider an batched :attr:`input` tensor of shape :math:`(N, C, *)`,
121  where :math:`N` is the batch dimension, :math:`C` is the channel dimension,
122  and :math:`*` represent arbitrary spatial dimensions. This operation flattens
123  each sliding :attr:`kernel_size`-sized block within the spatial dimensions
124  of :attr:`input` into a column (i.e., last dimension) of a 3-D :attr:`output`
125  tensor of shape :math:`(N, C \times \prod(\text{kernel\_size}), L)`, where
126  :math:`C \times \prod(\text{kernel\_size})` is the total number of values
127  within each block (a block has :math:`\prod(\text{kernel\_size})` spatial
128  locations each containing a :math:`C`-channeled vector), and :math:`L` is
129  the total number of such blocks:
130 
131  .. math::
132  L = \prod_d \left\lfloor\frac{\text{spatial\_size}[d] + 2 \times \text{padding}[d] %
133  - \text{dilation}[d] \times (\text{kernel\_size}[d] - 1) - 1}{\text{stride}[d]} + 1\right\rfloor,
134 
135  where :math:`\text{spatial\_size}` is formed by the spatial dimensions
136  of :attr:`input` (:math:`*` above), and :math:`d` is over all spatial
137  dimensions.
138 
139  Therefore, indexing :attr:`output` at the last dimension (column dimension)
140  gives all values within a certain block.
141 
142  The :attr:`padding`, :attr:`stride` and :attr:`dilation` arguments specify
143  how the sliding blocks are retrieved.
144 
145  * :attr:`stride` controls the stride for the sliding blocks.
146 
147  * :attr:`padding` controls the amount of implicit zero-paddings on both
148  sides for :attr:`padding` number of points for each dimension before
149  reshaping.
150 
151  * :attr:`dilation` controls the spacing between the kernel points; also known as the à trous algorithm.
152  It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
153 
154  Args:
155  kernel_size (int or tuple): the size of the sliding blocks
156  stride (int or tuple, optional): the stride of the sliding blocks in the input
157  spatial dimensions. Default: 1
158  padding (int or tuple, optional): implicit zero padding to be added on
159  both sides of input. Default: 0
160  dilation (int or tuple, optional): a parameter that controls the
161  stride of elements within the
162  neighborhood. Default: 1
163 
164  * If :attr:`kernel_size`, :attr:`dilation`, :attr:`padding` or
165  :attr:`stride` is an int or a tuple of length 1, their values will be
166  replicated across all spatial dimensions.
167 
168  * For the case of two input spatial dimensions this operation is sometimes
169  called ``im2col``.
170 
171  .. note::
172  :class:`~torch.nn.Fold` calculates each combined value in the resulting
173  large tensor by summing all values from all containing blocks.
174  :class:`~torch.nn.Unfold` extracts the values in the local blocks by
175  copying from the large tensor. So, if the blocks overlap, they are not
176  inverses of each other.
177 
178  .. warning::
179  Currently, only 4-D input tensors (batched image-like tensors) are
180  supported.
181 
182  Shape:
183  - Input: :math:`(N, C, *)`
184  - Output: :math:`(N, C \times \prod(\text{kernel\_size}), L)` as described above
185 
186  Examples::
187 
188  >>> unfold = nn.Unfold(kernel_size=(2, 3))
189  >>> input = torch.randn(2, 5, 3, 4)
190  >>> output = unfold(input)
191  >>> # each patch contains 30 values (2x3=6 vectors, each of 5 channels)
192  >>> # 4 blocks (2x3 kernels) in total in the 3x4 input
193  >>> output.size()
194  torch.Size([2, 30, 4])
195 
196  >>> # Convolution is equivalent with Unfold + Matrix Multiplication + Fold (or view to output shape)
197  >>> inp = torch.randn(1, 3, 10, 12)
198  >>> w = torch.randn(2, 3, 4, 5)
199  >>> inp_unf = torch.nn.functional.unfold(inp, (4, 5))
200  >>> out_unf = inp_unf.transpose(1, 2).matmul(w.view(w.size(0), -1).t()).transpose(1, 2)
201  >>> out = torch.nn.functional.fold(out_unf, (7, 8), (1, 1))
202  >>> # or equivalently (and avoiding a copy),
203  >>> # out = out_unf.view(1, 2, 7, 8)
204  >>> (torch.nn.functional.conv2d(inp, w) - out).abs().max()
205  tensor(1.9073e-06)
206 
207  .. _link:
208  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
209 
210  """
211  __constants__ = ['kernel_size', 'dilation', 'padding', 'stride']
212 
213  def __init__(self, kernel_size, dilation=1, padding=0, stride=1):
214  super(Unfold, self).__init__()
215  self.kernel_size = kernel_size
216  self.dilation = dilation
217  self.padding = padding
218  self.stride = stride
219 
220  @weak_script_method
221  def forward(self, input):
222  return F.unfold(input, self.kernel_size, self.dilation,
223  self.padding, self.stride)
224 
225  def extra_repr(self):
226  return 'kernel_size={kernel_size}, dilation={dilation}, padding={padding},' \
227  ' stride={stride}'.format(**self.__dict__)