Caffe2 - Python API
A deep learning, cross platform ML framework
pooling.py
1 import torch
2 
3 from .module import Module
4 from .utils import _single, _pair, _triple
5 from .. import functional as F
6 from ..._jit_internal import weak_module, weak_script_method
7 
8 
9 @weak_module
10 class _MaxPoolNd(Module):
11  __constants__ = ['kernel_size', 'stride', 'padding', 'dilation',
12  'return_indices', 'ceil_mode']
13 
14  def __init__(self, kernel_size, stride=None, padding=0, dilation=1,
15  return_indices=False, ceil_mode=False):
16  super(_MaxPoolNd, self).__init__()
17  self.kernel_size = kernel_size
18  self.stride = stride or kernel_size
19  self.padding = padding
20  self.dilation = dilation
21  self.return_indices = return_indices
22  self.ceil_mode = ceil_mode
23 
24  def extra_repr(self):
25  return 'kernel_size={kernel_size}, stride={stride}, padding={padding}' \
26  ', dilation={dilation}, ceil_mode={ceil_mode}'.format(**self.__dict__)
27 
28 
29 @weak_module
31  r"""Applies a 1D max pooling over an input signal composed of several input
32  planes.
33 
34  In the simplest case, the output value of the layer with input size :math:`(N, C, L)`
35  and output :math:`(N, C, L_{out})` can be precisely described as:
36 
37  .. math::
38  out(N_i, C_j, k) = \max_{m=0, \ldots, \text{kernel\_size} - 1}
39  input(N_i, C_j, stride \times k + m)
40 
41  If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
42  for :attr:`padding` number of points. :attr:`dilation` controls the spacing between the kernel points.
43  It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
44 
45  Args:
46  kernel_size: the size of the window to take a max over
47  stride: the stride of the window. Default value is :attr:`kernel_size`
48  padding: implicit zero padding to be added on both sides
49  dilation: a parameter that controls the stride of elements in the window
50  return_indices: if ``True``, will return the max indices along with the outputs.
51  Useful for :class:`torch.nn.MaxUnpool1d` later
52  ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
53 
54  Shape:
55  - Input: :math:`(N, C, L_{in})`
56  - Output: :math:`(N, C, L_{out})`, where
57 
58  .. math::
59  L_{out} = \left\lfloor \frac{L_{in} + 2 \times \text{padding} - \text{dilation}
60  \times (\text{kernel\_size} - 1) - 1}{\text{stride}} + 1\right\rfloor
61 
62  Examples::
63 
64  >>> # pool of size=3, stride=2
65  >>> m = nn.MaxPool1d(3, stride=2)
66  >>> input = torch.randn(20, 16, 50)
67  >>> output = m(input)
68 
69  .. _link:
70  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
71  """
72 
73  @weak_script_method
74  def forward(self, input):
75  return F.max_pool1d(input, self.kernel_size, self.stride,
76  self.padding, self.dilation, self.ceil_mode,
77  self.return_indices)
78 
79  def extra_repr(self):
80  return 'kernel_size={kernel_size}, stride={stride}, padding={padding}' \
81  ', dilation={dilation}, ceil_mode={ceil_mode}'.format(**self.__dict__)
82 
83 
84 @weak_module
86  r"""Applies a 2D max pooling over an input signal composed of several input
87  planes.
88 
89  In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
90  output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
91  can be precisely described as:
92 
93  .. math::
94  \begin{aligned}
95  out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\
96  & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
97  \text{stride[1]} \times w + n)
98  \end{aligned}
99 
100  If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
101  for :attr:`padding` number of points. :attr:`dilation` controls the spacing between the kernel points.
102  It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
103 
104  The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
105 
106  - a single ``int`` -- in which case the same value is used for the height and width dimension
107  - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
108  and the second `int` for the width dimension
109 
110  Args:
111  kernel_size: the size of the window to take a max over
112  stride: the stride of the window. Default value is :attr:`kernel_size`
113  padding: implicit zero padding to be added on both sides
114  dilation: a parameter that controls the stride of elements in the window
115  return_indices: if ``True``, will return the max indices along with the outputs.
116  Useful for :class:`torch.nn.MaxUnpool2d` later
117  ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
118 
119  Shape:
120  - Input: :math:`(N, C, H_{in}, W_{in})`
121  - Output: :math:`(N, C, H_{out}, W_{out})`, where
122 
123  .. math::
124  H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]}
125  \times (\text{kernel\_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor
126 
127  .. math::
128  W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]}
129  \times (\text{kernel\_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor
130 
131  Examples::
132 
133  >>> # pool of square window of size=3, stride=2
134  >>> m = nn.MaxPool2d(3, stride=2)
135  >>> # pool of non-square window
136  >>> m = nn.MaxPool2d((3, 2), stride=(2, 1))
137  >>> input = torch.randn(20, 16, 50, 32)
138  >>> output = m(input)
139 
140  .. _link:
141  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
142  """
143 
144  @weak_script_method
145  def forward(self, input):
146  return F.max_pool2d(input, self.kernel_size, self.stride,
147  self.padding, self.dilation, self.ceil_mode,
148  self.return_indices)
149 
150 
151 @weak_module
153  r"""Applies a 3D max pooling over an input signal composed of several input
154  planes.
155 
156  In the simplest case, the output value of the layer with input size :math:`(N, C, D, H, W)`,
157  output :math:`(N, C, D_{out}, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kD, kH, kW)`
158  can be precisely described as:
159 
160  .. math::
161  \begin{aligned}
162  \text{out}(N_i, C_j, d, h, w) ={} & \max_{k=0, \ldots, kD-1} \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\
163  & \text{input}(N_i, C_j, \text{stride[0]} \times d + k,
164  \text{stride[1]} \times h + m, \text{stride[2]} \times w + n)
165  \end{aligned}
166 
167  If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
168  for :attr:`padding` number of points. :attr:`dilation` controls the spacing between the kernel points.
169  It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
170 
171  The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
172 
173  - a single ``int`` -- in which case the same value is used for the depth, height and width dimension
174  - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension,
175  the second `int` for the height dimension and the third `int` for the width dimension
176 
177  Args:
178  kernel_size: the size of the window to take a max over
179  stride: the stride of the window. Default value is :attr:`kernel_size`
180  padding: implicit zero padding to be added on all three sides
181  dilation: a parameter that controls the stride of elements in the window
182  return_indices: if ``True``, will return the max indices along with the outputs.
183  Useful for :class:`torch.nn.MaxUnpool3d` later
184  ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
185 
186  Shape:
187  - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})`
188  - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})`, where
189 
190  .. math::
191  D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times
192  (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
193 
194  .. math::
195  H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times
196  (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
197 
198  .. math::
199  W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2] \times
200  (\text{kernel\_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor
201 
202  Examples::
203 
204  >>> # pool of square window of size=3, stride=2
205  >>> m = nn.MaxPool3d(3, stride=2)
206  >>> # pool of non-square window
207  >>> m = nn.MaxPool3d((3, 2, 2), stride=(2, 1, 2))
208  >>> input = torch.randn(20, 16, 50,44, 31)
209  >>> output = m(input)
210 
211  .. _link:
212  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
213  """ # noqa: E501
214 
215  @weak_script_method
216  def forward(self, input):
217  return F.max_pool3d(input, self.kernel_size, self.stride,
218  self.padding, self.dilation, self.ceil_mode,
219  self.return_indices)
220 
221 
222 @weak_module
223 class _MaxUnpoolNd(Module):
224 
225  def extra_repr(self):
226  return 'kernel_size={}, stride={}, padding={}'.format(
227  self.kernel_size, self.stride, self.padding
228  )
229 
230 
231 @weak_module
233  r"""Computes a partial inverse of :class:`MaxPool1d`.
234 
235  :class:`MaxPool1d` is not fully invertible, since the non-maximal values are lost.
236 
237  :class:`MaxUnpool1d` takes in as input the output of :class:`MaxPool1d`
238  including the indices of the maximal values and computes a partial inverse
239  in which all non-maximal values are set to zero.
240 
241  .. note:: :class:`MaxPool1d` can map several input sizes to the same output
242  sizes. Hence, the inversion process can get ambiguous.
243  To accommodate this, you can provide the needed output size
244  as an additional argument :attr:`output_size` in the forward call.
245  See the Inputs and Example below.
246 
247  Args:
248  kernel_size (int or tuple): Size of the max pooling window.
249  stride (int or tuple): Stride of the max pooling window.
250  It is set to :attr:`kernel_size` by default.
251  padding (int or tuple): Padding that was added to the input
252 
253  Inputs:
254  - `input`: the input Tensor to invert
255  - `indices`: the indices given out by :class:`~torch.nn.MaxPool1d`
256  - `output_size` (optional): the targeted output size
257 
258  Shape:
259  - Input: :math:`(N, C, H_{in})`
260  - Output: :math:`(N, C, H_{out})`, where
261 
262  .. math::
263  H_{out} = (H_{in} - 1) \times \text{stride}[0] - 2 \times \text{padding}[0] + \text{kernel\_size}[0]
264 
265  or as given by :attr:`output_size` in the call operator
266 
267  Example::
268 
269  >>> pool = nn.MaxPool1d(2, stride=2, return_indices=True)
270  >>> unpool = nn.MaxUnpool1d(2, stride=2)
271  >>> input = torch.tensor([[[1., 2, 3, 4, 5, 6, 7, 8]]])
272  >>> output, indices = pool(input)
273  >>> unpool(output, indices)
274  tensor([[[ 0., 2., 0., 4., 0., 6., 0., 8.]]])
275 
276  >>> # Example showcasing the use of output_size
277  >>> input = torch.tensor([[[1., 2, 3, 4, 5, 6, 7, 8, 9]]])
278  >>> output, indices = pool(input)
279  >>> unpool(output, indices, output_size=input.size())
280  tensor([[[ 0., 2., 0., 4., 0., 6., 0., 8., 0.]]])
281 
282  >>> unpool(output, indices)
283  tensor([[[ 0., 2., 0., 4., 0., 6., 0., 8.]]])
284  """
285 
286  def __init__(self, kernel_size, stride=None, padding=0):
287  super(MaxUnpool1d, self).__init__()
288  self.kernel_size = _single(kernel_size)
289  self.stride = _single(stride or kernel_size)
290  self.padding = _single(padding)
291 
292  def forward(self, input, indices, output_size=None):
293  return F.max_unpool1d(input, indices, self.kernel_size, self.stride,
294  self.padding, output_size)
295 
296 
297 @weak_module
299  r"""Computes a partial inverse of :class:`MaxPool2d`.
300 
301  :class:`MaxPool2d` is not fully invertible, since the non-maximal values are lost.
302 
303  :class:`MaxUnpool2d` takes in as input the output of :class:`MaxPool2d`
304  including the indices of the maximal values and computes a partial inverse
305  in which all non-maximal values are set to zero.
306 
307  .. note:: :class:`MaxPool2d` can map several input sizes to the same output
308  sizes. Hence, the inversion process can get ambiguous.
309  To accommodate this, you can provide the needed output size
310  as an additional argument :attr:`output_size` in the forward call.
311  See the Inputs and Example below.
312 
313  Args:
314  kernel_size (int or tuple): Size of the max pooling window.
315  stride (int or tuple): Stride of the max pooling window.
316  It is set to :attr:`kernel_size` by default.
317  padding (int or tuple): Padding that was added to the input
318 
319  Inputs:
320  - `input`: the input Tensor to invert
321  - `indices`: the indices given out by :class:`~torch.nn.MaxPool2d`
322  - `output_size` (optional): the targeted output size
323 
324  Shape:
325  - Input: :math:`(N, C, H_{in}, W_{in})`
326  - Output: :math:`(N, C, H_{out}, W_{out})`, where
327 
328  .. math::
329  H_{out} = (H_{in} - 1) \times \text{stride[0]} - 2 \times \text{padding[0]} + \text{kernel\_size[0]}
330 
331  .. math::
332  W_{out} = (W_{in} - 1) \times \text{stride[1]} - 2 \times \text{padding[1]} + \text{kernel\_size[1]}
333 
334  or as given by :attr:`output_size` in the call operator
335 
336  Example::
337 
338  >>> pool = nn.MaxPool2d(2, stride=2, return_indices=True)
339  >>> unpool = nn.MaxUnpool2d(2, stride=2)
340  >>> input = torch.tensor([[[[ 1., 2, 3, 4],
341  [ 5, 6, 7, 8],
342  [ 9, 10, 11, 12],
343  [13, 14, 15, 16]]]])
344  >>> output, indices = pool(input)
345  >>> unpool(output, indices)
346  tensor([[[[ 0., 0., 0., 0.],
347  [ 0., 6., 0., 8.],
348  [ 0., 0., 0., 0.],
349  [ 0., 14., 0., 16.]]]])
350 
351  >>> # specify a different output size than input size
352  >>> unpool(output, indices, output_size=torch.Size([1, 1, 5, 5]))
353  tensor([[[[ 0., 0., 0., 0., 0.],
354  [ 6., 0., 8., 0., 0.],
355  [ 0., 0., 0., 14., 0.],
356  [ 16., 0., 0., 0., 0.],
357  [ 0., 0., 0., 0., 0.]]]])
358  """
359 
360  def __init__(self, kernel_size, stride=None, padding=0):
361  super(MaxUnpool2d, self).__init__()
362  self.kernel_size = _pair(kernel_size)
363  self.stride = _pair(stride or kernel_size)
364  self.padding = _pair(padding)
365 
366  def forward(self, input, indices, output_size=None):
367  return F.max_unpool2d(input, indices, self.kernel_size, self.stride,
368  self.padding, output_size)
369 
370 
371 @weak_module
373  r"""Computes a partial inverse of :class:`MaxPool3d`.
374 
375  :class:`MaxPool3d` is not fully invertible, since the non-maximal values are lost.
376  :class:`MaxUnpool3d` takes in as input the output of :class:`MaxPool3d`
377  including the indices of the maximal values and computes a partial inverse
378  in which all non-maximal values are set to zero.
379 
380  .. note:: :class:`MaxPool3d` can map several input sizes to the same output
381  sizes. Hence, the inversion process can get ambiguous.
382  To accommodate this, you can provide the needed output size
383  as an additional argument :attr:`output_size` in the forward call.
384  See the Inputs section below.
385 
386  Args:
387  kernel_size (int or tuple): Size of the max pooling window.
388  stride (int or tuple): Stride of the max pooling window.
389  It is set to :attr:`kernel_size` by default.
390  padding (int or tuple): Padding that was added to the input
391 
392  Inputs:
393  - `input`: the input Tensor to invert
394  - `indices`: the indices given out by :class:`~torch.nn.MaxPool3d`
395  - `output_size` (optional): the targeted output size
396 
397  Shape:
398  - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})`
399  - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})`, where
400 
401  .. math::
402  D_{out} = (D_{in} - 1) \times \text{stride[0]} - 2 \times \text{padding[0]} + \text{kernel\_size[0]}
403 
404  .. math::
405  H_{out} = (H_{in} - 1) \times \text{stride[1]} - 2 \times \text{padding[1]} + \text{kernel\_size[1]}
406 
407  .. math::
408  W_{out} = (W_{in} - 1) \times \text{stride[2]} - 2 \times \text{padding[2]} + \text{kernel\_size[2]}
409 
410  or as given by :attr:`output_size` in the call operator
411 
412  Example::
413 
414  >>> # pool of square window of size=3, stride=2
415  >>> pool = nn.MaxPool3d(3, stride=2, return_indices=True)
416  >>> unpool = nn.MaxUnpool3d(3, stride=2)
417  >>> output, indices = pool(torch.randn(20, 16, 51, 33, 15))
418  >>> unpooled_output = unpool(output, indices)
419  >>> unpooled_output.size()
420  torch.Size([20, 16, 51, 33, 15])
421  """
422 
423  def __init__(self, kernel_size, stride=None, padding=0):
424  super(MaxUnpool3d, self).__init__()
425  self.kernel_size = _triple(kernel_size)
426  self.stride = _triple(stride or kernel_size)
427  self.padding = _triple(padding)
428 
429  def forward(self, input, indices, output_size=None):
430  return F.max_unpool3d(input, indices, self.kernel_size, self.stride,
431  self.padding, output_size)
432 
433 
434 @weak_module
435 class _AvgPoolNd(Module):
436  __constants__ = ['kernel_size', 'stride', 'padding', 'ceil_mode', 'count_include_pad']
437 
438  def extra_repr(self):
439  return 'kernel_size={}, stride={}, padding={}'.format(
440  self.kernel_size, self.stride, self.padding
441  )
442 
443 
444 @weak_module
446  r"""Applies a 1D average pooling over an input signal composed of several
447  input planes.
448 
449  In the simplest case, the output value of the layer with input size :math:`(N, C, L)`,
450  output :math:`(N, C, L_{out})` and :attr:`kernel_size` :math:`k`
451  can be precisely described as:
452 
453  .. math::
454 
455  \text{out}(N_i, C_j, l) = \frac{1}{k} \sum_{m=0}^{k-1}
456  \text{input}(N_i, C_j, \text{stride} \times l + m)
457 
458  If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
459  for :attr:`padding` number of points.
460 
461  The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding` can each be
462  an ``int`` or a one-element tuple.
463 
464  Args:
465  kernel_size: the size of the window
466  stride: the stride of the window. Default value is :attr:`kernel_size`
467  padding: implicit zero padding to be added on both sides
468  ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
469  count_include_pad: when True, will include the zero-padding in the averaging calculation
470 
471  Shape:
472  - Input: :math:`(N, C, L_{in})`
473  - Output: :math:`(N, C, L_{out})`, where
474 
475  .. math::
476  L_{out} = \left\lfloor \frac{L_{in} +
477  2 \times \text{padding} - \text{kernel\_size}}{\text{stride}} + 1\right\rfloor
478 
479  Examples::
480 
481  >>> # pool with window of size=3, stride=2
482  >>> m = nn.AvgPool1d(3, stride=2)
483  >>> m(torch.tensor([[[1.,2,3,4,5,6,7]]]))
484  tensor([[[ 2., 4., 6.]]])
485  """
486  def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False,
487  count_include_pad=True):
488  super(AvgPool1d, self).__init__()
489  self.kernel_size = _single(kernel_size)
490  self.stride = _single(stride if stride is not None else kernel_size)
491  self.padding = _single(padding)
492  self.ceil_mode = ceil_mode
493  self.count_include_pad = count_include_pad
494 
495  @weak_script_method
496  def forward(self, input):
497  return F.avg_pool1d(
498  input, self.kernel_size, self.stride, self.padding, self.ceil_mode,
499  self.count_include_pad)
500 
501 
502 @weak_module
504  r"""Applies a 2D average pooling over an input signal composed of several input
505  planes.
506 
507  In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
508  output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
509  can be precisely described as:
510 
511  .. math::
512 
513  out(N_i, C_j, h, w) = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1}
514  input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
515 
516  If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
517  for :attr:`padding` number of points.
518 
519  The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding` can either be:
520 
521  - a single ``int`` -- in which case the same value is used for the height and width dimension
522  - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
523  and the second `int` for the width dimension
524 
525  Args:
526  kernel_size: the size of the window
527  stride: the stride of the window. Default value is :attr:`kernel_size`
528  padding: implicit zero padding to be added on both sides
529  ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
530  count_include_pad: when True, will include the zero-padding in the averaging calculation
531 
532  Shape:
533  - Input: :math:`(N, C, H_{in}, W_{in})`
534  - Output: :math:`(N, C, H_{out}, W_{out})`, where
535 
536  .. math::
537  H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[0] -
538  \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor
539 
540  .. math::
541  W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[1] -
542  \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor
543 
544  Examples::
545 
546  >>> # pool of square window of size=3, stride=2
547  >>> m = nn.AvgPool2d(3, stride=2)
548  >>> # pool of non-square window
549  >>> m = nn.AvgPool2d((3, 2), stride=(2, 1))
550  >>> input = torch.randn(20, 16, 50, 32)
551  >>> output = m(input)
552  """
553  def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False,
554  count_include_pad=True):
555  super(AvgPool2d, self).__init__()
556  self.kernel_size = kernel_size
557  self.stride = stride or kernel_size
558  self.padding = padding
559  self.ceil_mode = ceil_mode
560  self.count_include_pad = count_include_pad
561 
562  @weak_script_method
563  def forward(self, input):
564  return F.avg_pool2d(input, self.kernel_size, self.stride,
565  self.padding, self.ceil_mode, self.count_include_pad)
566 
567 
568 @weak_module
570  r"""Applies a 3D average pooling over an input signal composed of several input
571  planes.
572 
573  In the simplest case, the output value of the layer with input size :math:`(N, C, D, H, W)`,
574  output :math:`(N, C, D_{out}, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kD, kH, kW)`
575  can be precisely described as:
576 
577  .. math::
578  \begin{aligned}
579  \text{out}(N_i, C_j, d, h, w) ={} & \sum_{k=0}^{kD-1} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1} \\
580  & \frac{\text{input}(N_i, C_j, \text{stride}[0] \times d + k,
581  \text{stride}[1] \times h + m, \text{stride}[2] \times w + n)}
582  {kD \times kH \times kW}
583  \end{aligned}
584 
585  If :attr:`padding` is non-zero, then the input is implicitly zero-padded on all three sides
586  for :attr:`padding` number of points.
587 
588  The parameters :attr:`kernel_size`, :attr:`stride` can either be:
589 
590  - a single ``int`` -- in which case the same value is used for the depth, height and width dimension
591  - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension,
592  the second `int` for the height dimension and the third `int` for the width dimension
593 
594  Args:
595  kernel_size: the size of the window
596  stride: the stride of the window. Default value is :attr:`kernel_size`
597  padding: implicit zero padding to be added on all three sides
598  ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
599  count_include_pad: when True, will include the zero-padding in the averaging calculation
600 
601  Shape:
602  - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})`
603  - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})`, where
604 
605  .. math::
606  D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] -
607  \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor
608 
609  .. math::
610  H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] -
611  \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor
612 
613  .. math::
614  W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] -
615  \text{kernel\_size}[2]}{\text{stride}[2]} + 1\right\rfloor
616 
617  Examples::
618 
619  >>> # pool of square window of size=3, stride=2
620  >>> m = nn.AvgPool3d(3, stride=2)
621  >>> # pool of non-square window
622  >>> m = nn.AvgPool3d((3, 2, 2), stride=(2, 1, 2))
623  >>> input = torch.randn(20, 16, 50,44, 31)
624  >>> output = m(input)
625  """
626  def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False,
627  count_include_pad=True):
628  super(AvgPool3d, self).__init__()
629  self.kernel_size = kernel_size
630  self.stride = stride or kernel_size
631  self.padding = padding
632  self.ceil_mode = ceil_mode
633  self.count_include_pad = count_include_pad
634 
635  @weak_script_method
636  def forward(self, input):
637  return F.avg_pool3d(input, self.kernel_size, self.stride,
638  self.padding, self.ceil_mode, self.count_include_pad)
639 
640  def __setstate__(self, d):
641  super(AvgPool3d, self).__setstate__(d)
642  self.__dict__.setdefault('padding', 0)
643  self.__dict__.setdefault('ceil_mode', False)
644  self.__dict__.setdefault('count_include_pad', True)
645 
646 
647 @weak_module
648 class FractionalMaxPool2d(Module):
649  r"""Applies a 2D fractional max pooling over an input signal composed of several input planes.
650 
651  Fractional MaxPooling is described in detail in the paper `Fractional MaxPooling`_ by Ben Graham
652 
653  The max-pooling operation is applied in :math:`kH \times kW` regions by a stochastic
654  step size determined by the target output size.
655  The number of output features is equal to the number of input planes.
656 
657  Args:
658  kernel_size: the size of the window to take a max over.
659  Can be a single number k (for a square kernel of k x k) or a tuple `(kh, kw)`
660  output_size: the target output size of the image of the form `oH x oW`.
661  Can be a tuple `(oH, oW)` or a single number oH for a square image `oH x oH`
662  output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given.
663  This has to be a number or tuple in the range (0, 1)
664  return_indices: if ``True``, will return the indices along with the outputs.
665  Useful to pass to :meth:`nn.MaxUnpool2d`. Default: ``False``
666 
667  Examples:
668  >>> # pool of square window of size=3, and target output size 13x12
669  >>> m = nn.FractionalMaxPool2d(3, output_size=(13, 12))
670  >>> # pool of square window and target output size being half of input image size
671  >>> m = nn.FractionalMaxPool2d(3, output_ratio=(0.5, 0.5))
672  >>> input = torch.randn(20, 16, 50, 32)
673  >>> output = m(input)
674 
675  .. _Fractional MaxPooling:
676  http://arxiv.org/abs/1412.6071
677  """
678  __constants__ = ['kernel_size', 'return_indices', 'output_size',
679  'output_ratio']
680 
681  def __init__(self, kernel_size, output_size=None, output_ratio=None,
682  return_indices=False, _random_samples=None):
683  super(FractionalMaxPool2d, self).__init__()
684  self.kernel_size = _pair(kernel_size)
685  self.return_indices = return_indices
686  self.register_buffer('_random_samples', _random_samples)
687  self.output_size = _pair(output_size) if output_size is not None else None
688  self.output_ratio = _pair(output_ratio) if output_ratio is not None else None
689  if output_size is None and output_ratio is None:
690  raise ValueError("FractionalMaxPool2d requires specifying either "
691  "an output size, or a pooling ratio")
692  if output_size is not None and output_ratio is not None:
693  raise ValueError("only one of output_size and output_ratio may be specified")
694  if self.output_ratio is not None:
695  if not (0 < self.output_ratio[0] < 1 and 0 < self.output_ratio[1] < 1):
696  raise ValueError("output_ratio must be between 0 and 1 (got {})"
697  .format(output_ratio))
698 
699  @weak_script_method
700  def forward(self, input):
701  return F.fractional_max_pool2d(
702  input, self.kernel_size, self.output_size, self.output_ratio,
703  self.return_indices,
704  _random_samples=self._random_samples)
705 
706 
707 @weak_module
708 class FractionalMaxPool3d(Module):
709  r"""Applies a 3D fractional max pooling over an input signal composed of several input planes.
710 
711  Fractional MaxPooling is described in detail in the paper `Fractional MaxPooling`_ by Ben Graham
712 
713  The max-pooling operation is applied in :math:`kTxkHxkW` regions by a stochastic
714  step size determined by the target output size.
715  The number of output features is equal to the number of input planes.
716 
717  Args:
718  kernel_size: the size of the window to take a max over.
719  Can be a single number k (for a square kernel of k x k x k) or a tuple `(kt x kh x kw)`
720  output_size: the target output size of the image of the form `oT x oH x oW`.
721  Can be a tuple `(oT, oH, oW)` or a single number oH for a square image `oH x oH x oH`
722  output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given.
723  This has to be a number or tuple in the range (0, 1)
724  return_indices: if ``True``, will return the indices along with the outputs.
725  Useful to pass to :meth:`nn.MaxUnpool3d`. Default: ``False``
726 
727  Examples:
728  >>> # pool of cubic window of size=3, and target output size 13x12x11
729  >>> m = nn.FractionalMaxPool3d(3, output_size=(13, 12, 11))
730  >>> # pool of cubic window and target output size being half of input size
731  >>> m = nn.FractionalMaxPool3d(3, output_ratio=(0.5, 0.5, 0.5))
732  >>> input = torch.randn(20, 16, 50, 32, 16)
733  >>> output = m(input)
734 
735  .. _Fractional MaxPooling:
736  http://arxiv.org/abs/1412.6071
737  """
738  __constants__ = ['kernel_size', 'return_indices', 'output_size',
739  'output_ratio']
740 
741  def __init__(self, kernel_size, output_size=None, output_ratio=None,
742  return_indices=False, _random_samples=None):
743  super(FractionalMaxPool3d, self).__init__()
744  self.kernel_size = _triple(kernel_size)
745  self.return_indices = return_indices
746  self.register_buffer('_random_samples', _random_samples)
747  self.output_size = _triple(output_size) if output_size is not None else None
748  self.output_ratio = _triple(output_ratio) if output_ratio is not None else None
749  if output_size is None and output_ratio is None:
750  raise ValueError("FractionalMaxPool3d requires specifying either "
751  "an output size, or a pooling ratio")
752  if output_size is not None and output_ratio is not None:
753  raise ValueError("only one of output_size and output_ratio may be specified")
754  if self.output_ratio is not None:
755  if not (0 < self.output_ratio[0] < 1 and 0 < self.output_ratio[1] < 1 and 0 < self.output_ratio[2] < 1):
756  raise ValueError("output_ratio must be between 0 and 1 (got {})"
757  .format(output_ratio))
758 
759  @weak_script_method
760  def forward(self, input):
761  return F.fractional_max_pool3d(
762  input, self.kernel_size, self.output_size, self.output_ratio,
763  self.return_indices,
764  _random_samples=self._random_samples)
765 
766 
767 @weak_module
768 class _LPPoolNd(Module):
769  __constants__ = ['norm_type', 'kernel_size', 'stride', 'ceil_mode']
770 
771  def __init__(self, norm_type, kernel_size, stride=None, ceil_mode=False):
772  super(_LPPoolNd, self).__init__()
773  self.norm_type = norm_type
774  self.kernel_size = kernel_size
775  self.stride = stride
776  self.ceil_mode = ceil_mode
777 
778  def extra_repr(self):
779  return 'norm_type={norm_type}, kernel_size={kernel_size}, stride={stride}, ' \
780  'ceil_mode={ceil_mode}'.format(**self.__dict__)
781 
782 
783 @weak_module
785  r"""Applies a 1D power-average pooling over an input signal composed of several input
786  planes.
787 
788  On each window, the function computed is:
789 
790  .. math::
791  f(X) = \sqrt[p]{\sum_{x \in X} x^{p}}
792 
793  - At p = :math:`\infty`, one gets Max Pooling
794  - At p = 1, one gets Sum Pooling (which is proportional to Average Pooling)
795 
796  .. note:: If the sum to the power of `p` is zero, the gradient of this function is
797  not defined. This implementation will set the gradient to zero in this case.
798 
799  Args:
800  kernel_size: a single int, the size of the window
801  stride: a single int, the stride of the window. Default value is :attr:`kernel_size`
802  ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
803 
804  Shape:
805  - Input: :math:`(N, C, L_{in})`
806  - Output: :math:`(N, C, L_{out})`, where
807 
808  .. math::
809  L_{out} = \left\lfloor\frac{L_{in} +
810  2 \times \text{padding} - \text{kernel\_size}}{\text{stride}} + 1\right\rfloor
811 
812  Examples::
813  >>> # power-2 pool of window of length 3, with stride 2.
814  >>> m = nn.LPPool1d(2, 3, stride=2)
815  >>> input = torch.randn(20, 16, 50)
816  >>> output = m(input)
817  """
818 
819  @weak_script_method
820  @weak_script_method
821  def forward(self, input):
822  return F.lp_pool1d(input, float(self.norm_type), self.kernel_size,
823  self.stride, self.ceil_mode)
824 
825 
826 @weak_module
828  r"""Applies a 2D power-average pooling over an input signal composed of several input
829  planes.
830 
831  On each window, the function computed is:
832 
833  .. math::
834  f(X) = \sqrt[p]{\sum_{x \in X} x^{p}}
835 
836  - At p = :math:`\infty`, one gets Max Pooling
837  - At p = 1, one gets Sum Pooling (which is proportional to average pooling)
838 
839  The parameters :attr:`kernel_size`, :attr:`stride` can either be:
840 
841  - a single ``int`` -- in which case the same value is used for the height and width dimension
842  - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
843  and the second `int` for the width dimension
844 
845  .. note:: If the sum to the power of `p` is zero, the gradient of this function is
846  not defined. This implementation will set the gradient to zero in this case.
847 
848  Args:
849  kernel_size: the size of the window
850  stride: the stride of the window. Default value is :attr:`kernel_size`
851  ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
852 
853  Shape:
854  - Input: :math:`(N, C, H_{in}, W_{in})`
855  - Output: :math:`(N, C, H_{out}, W_{out})`, where
856 
857  .. math::
858  H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times
859  (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
860 
861  .. math::
862  W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times
863  (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
864 
865  Examples::
866 
867  >>> # power-2 pool of square window of size=3, stride=2
868  >>> m = nn.LPPool2d(2, 3, stride=2)
869  >>> # pool of non-square window of power 1.2
870  >>> m = nn.LPPool2d(1.2, (3, 2), stride=(2, 1))
871  >>> input = torch.randn(20, 16, 50, 32)
872  >>> output = m(input)
873 
874  """
875 
876  @weak_script_method
877  def forward(self, input):
878  return F.lp_pool2d(input, float(self.norm_type), self.kernel_size,
879  self.stride, self.ceil_mode)
880 
881 
882 @weak_module
883 class _AdaptiveMaxPoolNd(Module):
884  __constants__ = ['output_size', 'return_indices']
885 
886  def __init__(self, output_size, return_indices=False):
887  super(_AdaptiveMaxPoolNd, self).__init__()
888  self.output_size = output_size
889  self.return_indices = return_indices
890 
891  def extra_repr(self):
892  return 'output_size={}'.format(self.output_size)
893 
894 # FIXME (by @ssnl): Improve adaptive pooling docs: specify what the input and
895 # output shapes are, and how the operation computes output.
896 
897 
898 @weak_module
900  r"""Applies a 1D adaptive max pooling over an input signal composed of several input planes.
901 
902  The output size is H, for any input size.
903  The number of output features is equal to the number of input planes.
904 
905  Args:
906  output_size: the target output size H
907  return_indices: if ``True``, will return the indices along with the outputs.
908  Useful to pass to nn.MaxUnpool1d. Default: ``False``
909 
910  Examples:
911  >>> # target output size of 5
912  >>> m = nn.AdaptiveMaxPool1d(5)
913  >>> input = torch.randn(1, 64, 8)
914  >>> output = m(input)
915 
916  """
917 
918  @weak_script_method
919  def forward(self, input):
920  return F.adaptive_max_pool1d(input, self.output_size, self.return_indices)
921 
922 
923 @weak_module
925  r"""Applies a 2D adaptive max pooling over an input signal composed of several input planes.
926 
927  The output is of size H x W, for any input size.
928  The number of output features is equal to the number of input planes.
929 
930  Args:
931  output_size: the target output size of the image of the form H x W.
932  Can be a tuple (H, W) or a single H for a square image H x H.
933  H and W can be either a ``int``, or ``None`` which means the size will
934  be the same as that of the input.
935  return_indices: if ``True``, will return the indices along with the outputs.
936  Useful to pass to nn.MaxUnpool2d. Default: ``False``
937 
938  Examples:
939  >>> # target output size of 5x7
940  >>> m = nn.AdaptiveMaxPool2d((5,7))
941  >>> input = torch.randn(1, 64, 8, 9)
942  >>> output = m(input)
943  >>> # target output size of 7x7 (square)
944  >>> m = nn.AdaptiveMaxPool2d(7)
945  >>> input = torch.randn(1, 64, 10, 9)
946  >>> output = m(input)
947  >>> # target output size of 10x7
948  >>> m = nn.AdaptiveMaxPool2d((None, 7))
949  >>> input = torch.randn(1, 64, 10, 9)
950  >>> output = m(input)
951 
952  """
953 
954  @weak_script_method
955  def forward(self, input):
956  return F.adaptive_max_pool2d(input, self.output_size, self.return_indices)
957 
958 
959 @weak_module
961  r"""Applies a 3D adaptive max pooling over an input signal composed of several input planes.
962 
963  The output is of size D x H x W, for any input size.
964  The number of output features is equal to the number of input planes.
965 
966  Args:
967  output_size: the target output size of the image of the form D x H x W.
968  Can be a tuple (D, H, W) or a single D for a cube D x D x D.
969  D, H and W can be either a ``int``, or ``None`` which means the size will
970  be the same as that of the input.
971 
972  return_indices: if ``True``, will return the indices along with the outputs.
973  Useful to pass to nn.MaxUnpool3d. Default: ``False``
974 
975  Examples:
976  >>> # target output size of 5x7x9
977  >>> m = nn.AdaptiveMaxPool3d((5,7,9))
978  >>> input = torch.randn(1, 64, 8, 9, 10)
979  >>> output = m(input)
980  >>> # target output size of 7x7x7 (cube)
981  >>> m = nn.AdaptiveMaxPool3d(7)
982  >>> input = torch.randn(1, 64, 10, 9, 8)
983  >>> output = m(input)
984  >>> # target output size of 7x9x8
985  >>> m = nn.AdaptiveMaxPool3d((7, None, None))
986  >>> input = torch.randn(1, 64, 10, 9, 8)
987  >>> output = m(input)
988 
989  """
990 
991  @weak_script_method
992  def forward(self, input):
993  return F.adaptive_max_pool3d(input, self.output_size, self.return_indices)
994 
995 
996 @weak_module
997 class _AdaptiveAvgPoolNd(Module):
998  __constants__ = ['output_size']
999 
1000  def __init__(self, output_size):
1001  super(_AdaptiveAvgPoolNd, self).__init__()
1002  self.output_size = output_size
1003 
1004  def extra_repr(self):
1005  return 'output_size={}'.format(self.output_size)
1006 
1007 
1008 @weak_module
1010  r"""Applies a 1D adaptive average pooling over an input signal composed of several input planes.
1011 
1012  The output size is H, for any input size.
1013  The number of output features is equal to the number of input planes.
1014 
1015  Args:
1016  output_size: the target output size H
1017 
1018  Examples:
1019  >>> # target output size of 5
1020  >>> m = nn.AdaptiveAvgPool1d(5)
1021  >>> input = torch.randn(1, 64, 8)
1022  >>> output = m(input)
1023 
1024  """
1025 
1026  @weak_script_method
1027  def forward(self, input):
1028  return F.adaptive_avg_pool1d(input, self.output_size)
1029 
1030 
1031 @weak_module
1033  r"""Applies a 2D adaptive average pooling over an input signal composed of several input planes.
1034 
1035  The output is of size H x W, for any input size.
1036  The number of output features is equal to the number of input planes.
1037 
1038  Args:
1039  output_size: the target output size of the image of the form H x W.
1040  Can be a tuple (H, W) or a single H for a square image H x H.
1041  H and W can be either a ``int``, or ``None`` which means the size will
1042  be the same as that of the input.
1043 
1044  Examples:
1045  >>> # target output size of 5x7
1046  >>> m = nn.AdaptiveAvgPool2d((5,7))
1047  >>> input = torch.randn(1, 64, 8, 9)
1048  >>> output = m(input)
1049  >>> # target output size of 7x7 (square)
1050  >>> m = nn.AdaptiveAvgPool2d(7)
1051  >>> input = torch.randn(1, 64, 10, 9)
1052  >>> output = m(input)
1053  >>> # target output size of 10x7
1054  >>> m = nn.AdaptiveMaxPool2d((None, 7))
1055  >>> input = torch.randn(1, 64, 10, 9)
1056  >>> output = m(input)
1057 
1058  """
1059 
1060  @weak_script_method
1061  def forward(self, input):
1062  return F.adaptive_avg_pool2d(input, self.output_size)
1063 
1064 
1065 @weak_module
1067  r"""Applies a 3D adaptive average pooling over an input signal composed of several input planes.
1068 
1069  The output is of size D x H x W, for any input size.
1070  The number of output features is equal to the number of input planes.
1071 
1072  Args:
1073  output_size: the target output size of the form D x H x W.
1074  Can be a tuple (D, H, W) or a single number D for a cube D x D x D.
1075  D, H and W can be either a ``int``, or ``None`` which means the size will
1076  be the same as that of the input.
1077 
1078  Examples:
1079  >>> # target output size of 5x7x9
1080  >>> m = nn.AdaptiveAvgPool3d((5,7,9))
1081  >>> input = torch.randn(1, 64, 8, 9, 10)
1082  >>> output = m(input)
1083  >>> # target output size of 7x7x7 (cube)
1084  >>> m = nn.AdaptiveAvgPool3d(7)
1085  >>> input = torch.randn(1, 64, 10, 9, 8)
1086  >>> output = m(input)
1087  >>> # target output size of 7x9x8
1088  >>> m = nn.AdaptiveMaxPool3d((7, None, None))
1089  >>> input = torch.randn(1, 64, 10, 9, 8)
1090  >>> output = m(input)
1091 
1092  """
1093 
1094  @weak_script_method
1095  def forward(self, input):
1096  return F.adaptive_avg_pool3d(input, self.output_size)