 Caffe2 - Python API A deep learning, cross platform ML framework
fold.py
1 # coding=utf-8
2 from .module import Module
3 from .. import functional as F
4 from ..._jit_internal import weak_module, weak_script_method
5
6
7 @weak_module
8 class Fold(Module):
9  r"""Combines an array of sliding local blocks into a large containing
10  tensor.
11
12  Consider a batched :attr:input tensor containing sliding local blocks,
13  e.g., patches of images, of shape :math:(N, C \times \prod(\text{kernel\_size}), L),
14  where :math:N is batch dimension, :math:C \times \prod(\text{kernel\_size})
15  is the number of values within a block (a block has :math:\prod(\text{kernel\_size})
16  spatial locations each containing a :math:C-channeled vector), and
17  :math:L is the total number of blocks. (This is exactly the
18  same specification as the output shape of :class:~torch.nn.Unfold.) This
19  operation combines these local blocks into the large :attr:output tensor
20  of shape :math:(N, C, \text{output\_size}, \text{output\_size}, \dots)
21  by summing the overlapping values. Similar to :class:~torch.nn.Unfold, the
22  arguments must satisfy
23
24  .. math::
25  L = \prod_d \left\lfloor\frac{\text{output\_size}[d] + 2 \times \text{padding}[d] %
26  - \text{dilation}[d] \times (\text{kernel\_size}[d] - 1) - 1}{\text{stride}[d]} + 1\right\rfloor,
27
28  where :math:d is over all spatial dimensions.
29
30  * :attr:output_size describes the spatial shape of the large containing
31  tensor of the sliding local blocks. It is useful to resolve the ambiguity
32  when multiple input shapes map to same number of sliding blocks, e.g.,
33  with stride > 0.
34
35  The :attr:padding, :attr:stride and :attr:dilation arguments specify
36  how the sliding blocks are retrieved.
37
38  * :attr:stride controls the stride for the sliding blocks.
39
40  * :attr:padding controls the amount of implicit zero-paddings on both
41  sides for :attr:padding number of points for each dimension before
42  reshaping.
43
44  * :attr:dilation controls the spacing between the kernel points; also known as the à trous algorithm.
45  It is harder to describe, but this link_ has a nice visualization of what :attr:dilation does.
46
47  Args:
48  output_size (int or tuple): the shape of the spatial dimensions of the
49  output (i.e., output.sizes()[2:])
50  kernel_size (int or tuple): the size of the sliding blocks
51  stride (int or tuple): the stride of the sliding blocks in the input
52  spatial dimensions. Default: 1
54  both sides of input. Default: 0
55  dilation (int or tuple, optional): a parameter that controls the
56  stride of elements within the
57  neighborhood. Default: 1
58
59  * If :attr:output_size, :attr:kernel_size, :attr:dilation,
60  :attr:padding or :attr:stride is an int or a tuple of length 1 then
61  their values will be replicated across all spatial dimensions.
62
63  * For the case of two output spatial dimensions this operation is sometimes
64  called col2im.
65
66  .. note::
67  :class:~torch.nn.Fold calculates each combined value in the resulting
68  large tensor by summing all values from all containing blocks.
69  :class:~torch.nn.Unfold extracts the values in the local blocks by
70  copying from the large tensor. So, if the blocks overlap, they are not
71  inverses of each other.
72
73  .. warning::
74  Currently, only 4-D output tensors (batched image-like tensors) are
75  supported.
76
77  Shape:
78  - Input: :math:(N, C \times \prod(\text{kernel\_size}), L)
79  - Output: :math:(N, C, \text{output\_size}, \text{output\_size}, \dots) as described above
80
81  Examples::
82
83  >>> fold = nn.Fold(output_size=(4, 5), kernel_size=(2, 2))
84  >>> input = torch.randn(1, 3 * 2 * 2, 12)
85  >>> output = fold(input)
86  >>> output.size()
87  torch.Size([1, 3, 4, 5])
88
91
92  """
93  __constants__ = ['output_size', 'kernel_size', 'dilation', 'padding',
94  'stride']
95
96  def __init__(self, output_size, kernel_size, dilation=1, padding=0, stride=1):
97  super(Fold, self).__init__()
98  self.output_size = output_size
99  self.kernel_size = kernel_size
100  self.dilation = dilation
102  self.stride = stride
103
104  @weak_script_method
105  def forward(self, input):
106  return F.fold(input, self.output_size, self.kernel_size, self.dilation,
108
109  def extra_repr(self):
110  return 'output_size={output_size}, kernel_size={kernel_size}, ' \
112  **self.__dict__
113  )
114
115
116 @weak_module
117 class Unfold(Module):
118  r"""Extracts sliding local blocks from a batched input tensor.
119
120  Consider an batched :attr:input tensor of shape :math:(N, C, *),
121  where :math:N is the batch dimension, :math:C is the channel dimension,
122  and :math:* represent arbitrary spatial dimensions. This operation flattens
123  each sliding :attr:kernel_size-sized block within the spatial dimensions
124  of :attr:input into a column (i.e., last dimension) of a 3-D :attr:output
125  tensor of shape :math:(N, C \times \prod(\text{kernel\_size}), L), where
126  :math:C \times \prod(\text{kernel\_size}) is the total number of values
127  within each block (a block has :math:\prod(\text{kernel\_size}) spatial
128  locations each containing a :math:C-channeled vector), and :math:L is
129  the total number of such blocks:
130
131  .. math::
132  L = \prod_d \left\lfloor\frac{\text{spatial\_size}[d] + 2 \times \text{padding}[d] %
133  - \text{dilation}[d] \times (\text{kernel\_size}[d] - 1) - 1}{\text{stride}[d]} + 1\right\rfloor,
134
135  where :math:\text{spatial\_size} is formed by the spatial dimensions
136  of :attr:input (:math:* above), and :math:d is over all spatial
137  dimensions.
138
139  Therefore, indexing :attr:output at the last dimension (column dimension)
140  gives all values within a certain block.
141
142  The :attr:padding, :attr:stride and :attr:dilation arguments specify
143  how the sliding blocks are retrieved.
144
145  * :attr:stride controls the stride for the sliding blocks.
146
147  * :attr:padding controls the amount of implicit zero-paddings on both
148  sides for :attr:padding number of points for each dimension before
149  reshaping.
150
151  * :attr:dilation controls the spacing between the kernel points; also known as the à trous algorithm.
152  It is harder to describe, but this link_ has a nice visualization of what :attr:dilation does.
153
154  Args:
155  kernel_size (int or tuple): the size of the sliding blocks
156  stride (int or tuple, optional): the stride of the sliding blocks in the input
157  spatial dimensions. Default: 1
159  both sides of input. Default: 0
160  dilation (int or tuple, optional): a parameter that controls the
161  stride of elements within the
162  neighborhood. Default: 1
163
164  * If :attr:kernel_size, :attr:dilation, :attr:padding or
165  :attr:stride is an int or a tuple of length 1, their values will be
166  replicated across all spatial dimensions.
167
168  * For the case of two input spatial dimensions this operation is sometimes
169  called im2col.
170
171  .. note::
172  :class:~torch.nn.Fold calculates each combined value in the resulting
173  large tensor by summing all values from all containing blocks.
174  :class:~torch.nn.Unfold extracts the values in the local blocks by
175  copying from the large tensor. So, if the blocks overlap, they are not
176  inverses of each other.
177
178  .. warning::
179  Currently, only 4-D input tensors (batched image-like tensors) are
180  supported.
181
182  Shape:
183  - Input: :math:(N, C, *)
184  - Output: :math:(N, C \times \prod(\text{kernel\_size}), L) as described above
185
186  Examples::
187
188  >>> unfold = nn.Unfold(kernel_size=(2, 3))
189  >>> input = torch.randn(2, 5, 3, 4)
190  >>> output = unfold(input)
191  >>> # each patch contains 30 values (2x3=6 vectors, each of 5 channels)
192  >>> # 4 blocks (2x3 kernels) in total in the 3x4 input
193  >>> output.size()
194  torch.Size([2, 30, 4])
195
196  >>> # Convolution is equivalent with Unfold + Matrix Multiplication + Fold (or view to output shape)
197  >>> inp = torch.randn(1, 3, 10, 12)
198  >>> w = torch.randn(2, 3, 4, 5)
199  >>> inp_unf = torch.nn.functional.unfold(inp, (4, 5))
200  >>> out_unf = inp_unf.transpose(1, 2).matmul(w.view(w.size(0), -1).t()).transpose(1, 2)
201  >>> out = torch.nn.functional.fold(out_unf, (7, 8), (1, 1))
202  >>> # or equivalently (and avoiding a copy),
203  >>> # out = out_unf.view(1, 2, 7, 8)
204  >>> (torch.nn.functional.conv2d(inp, w) - out).abs().max()
205  tensor(1.9073e-06)
206
209
210  """
211  __constants__ = ['kernel_size', 'dilation', 'padding', 'stride']
212
213  def __init__(self, kernel_size, dilation=1, padding=0, stride=1):
214  super(Unfold, self).__init__()
215  self.kernel_size = kernel_size
216  self.dilation = dilation