Caffe2 - Python API A deep learning, cross platform ML framework
pooling.py
1 import torch
2
3 from .module import Module
4 from .utils import _single, _pair, _triple
5 from .. import functional as F
6 from ..._jit_internal import weak_module, weak_script_method
7
8
9 @weak_module
10 class _MaxPoolNd(Module):
11  __constants__ = ['kernel_size', 'stride', 'padding', 'dilation',
12  'return_indices', 'ceil_mode']
13
14  def __init__(self, kernel_size, stride=None, padding=0, dilation=1,
15  return_indices=False, ceil_mode=False):
16  super(_MaxPoolNd, self).__init__()
17  self.kernel_size = kernel_size
18  self.stride = stride or kernel_size
19  self.padding = padding
20  self.dilation = dilation
21  self.return_indices = return_indices
22  self.ceil_mode = ceil_mode
23
24  def extra_repr(self):
25  return 'kernel_size={kernel_size}, stride={stride}, padding={padding}' \
26  ', dilation={dilation}, ceil_mode={ceil_mode}'.format(**self.__dict__)
27
28
29 @weak_module
31  r"""Applies a 1D max pooling over an input signal composed of several input
32  planes.
33
34  In the simplest case, the output value of the layer with input size :math:(N, C, L)
35  and output :math:(N, C, L_{out}) can be precisely described as:
36
37  .. math::
38  out(N_i, C_j, k) = \max_{m=0, \ldots, \text{kernel\_size} - 1}
39  input(N_i, C_j, stride \times k + m)
40
41  If :attr:padding is non-zero, then the input is implicitly zero-padded on both sides
42  for :attr:padding number of points. :attr:dilation controls the spacing between the kernel points.
43  It is harder to describe, but this link_ has a nice visualization of what :attr:dilation does.
44
45  Args:
46  kernel_size: the size of the window to take a max over
47  stride: the stride of the window. Default value is :attr:kernel_size
48  padding: implicit zero padding to be added on both sides
49  dilation: a parameter that controls the stride of elements in the window
50  return_indices: if True, will return the max indices along with the outputs.
51  Useful for :class:torch.nn.MaxUnpool1d later
52  ceil_mode: when True, will use ceil instead of floor to compute the output shape
53
54  Shape:
55  - Input: :math:(N, C, L_{in})
56  - Output: :math:(N, C, L_{out}), where
57
58  .. math::
59  L_{out} = \left\lfloor \frac{L_{in} + 2 \times \text{padding} - \text{dilation}
60  \times (\text{kernel\_size} - 1) - 1}{\text{stride}} + 1\right\rfloor
61
62  Examples::
63
64  >>> # pool of size=3, stride=2
65  >>> m = nn.MaxPool1d(3, stride=2)
66  >>> input = torch.randn(20, 16, 50)
67  >>> output = m(input)
68
69  .. _link:
70  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
71  """
72
73  @weak_script_method
74  def forward(self, input):
75  return F.max_pool1d(input, self.kernel_size, self.stride,
76  self.padding, self.dilation, self.ceil_mode,
77  self.return_indices)
78
79  def extra_repr(self):
80  return 'kernel_size={kernel_size}, stride={stride}, padding={padding}' \
81  ', dilation={dilation}, ceil_mode={ceil_mode}'.format(**self.__dict__)
82
83
84 @weak_module
86  r"""Applies a 2D max pooling over an input signal composed of several input
87  planes.
88
89  In the simplest case, the output value of the layer with input size :math:(N, C, H, W),
90  output :math:(N, C, H_{out}, W_{out}) and :attr:kernel_size :math:(kH, kW)
91  can be precisely described as:
92
93  .. math::
94  \begin{aligned}
95  out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\
96  & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
97  \text{stride[1]} \times w + n)
98  \end{aligned}
99
100  If :attr:padding is non-zero, then the input is implicitly zero-padded on both sides
101  for :attr:padding number of points. :attr:dilation controls the spacing between the kernel points.
102  It is harder to describe, but this link_ has a nice visualization of what :attr:dilation does.
103
104  The parameters :attr:kernel_size, :attr:stride, :attr:padding, :attr:dilation can either be:
105
106  - a single int -- in which case the same value is used for the height and width dimension
107  - a tuple of two ints -- in which case, the first int is used for the height dimension,
108  and the second int for the width dimension
109
110  Args:
111  kernel_size: the size of the window to take a max over
112  stride: the stride of the window. Default value is :attr:kernel_size
113  padding: implicit zero padding to be added on both sides
114  dilation: a parameter that controls the stride of elements in the window
115  return_indices: if True, will return the max indices along with the outputs.
116  Useful for :class:torch.nn.MaxUnpool2d later
117  ceil_mode: when True, will use ceil instead of floor to compute the output shape
118
119  Shape:
120  - Input: :math:(N, C, H_{in}, W_{in})
121  - Output: :math:(N, C, H_{out}, W_{out}), where
122
123  .. math::
124  H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]}
125  \times (\text{kernel\_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor
126
127  .. math::
128  W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]}
129  \times (\text{kernel\_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor
130
131  Examples::
132
133  >>> # pool of square window of size=3, stride=2
134  >>> m = nn.MaxPool2d(3, stride=2)
135  >>> # pool of non-square window
136  >>> m = nn.MaxPool2d((3, 2), stride=(2, 1))
137  >>> input = torch.randn(20, 16, 50, 32)
138  >>> output = m(input)
139
140  .. _link:
141  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
142  """
143
144  @weak_script_method
145  def forward(self, input):
146  return F.max_pool2d(input, self.kernel_size, self.stride,
147  self.padding, self.dilation, self.ceil_mode,
148  self.return_indices)
149
150
151 @weak_module
153  r"""Applies a 3D max pooling over an input signal composed of several input
154  planes.
155
156  In the simplest case, the output value of the layer with input size :math:(N, C, D, H, W),
157  output :math:(N, C, D_{out}, H_{out}, W_{out}) and :attr:kernel_size :math:(kD, kH, kW)
158  can be precisely described as:
159
160  .. math::
161  \begin{aligned}
162  \text{out}(N_i, C_j, d, h, w) ={} & \max_{k=0, \ldots, kD-1} \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\
163  & \text{input}(N_i, C_j, \text{stride[0]} \times d + k,
164  \text{stride[1]} \times h + m, \text{stride[2]} \times w + n)
165  \end{aligned}
166
167  If :attr:padding is non-zero, then the input is implicitly zero-padded on both sides
168  for :attr:padding number of points. :attr:dilation controls the spacing between the kernel points.
169  It is harder to describe, but this link_ has a nice visualization of what :attr:dilation does.
170
171  The parameters :attr:kernel_size, :attr:stride, :attr:padding, :attr:dilation can either be:
172
173  - a single int -- in which case the same value is used for the depth, height and width dimension
174  - a tuple of three ints -- in which case, the first int is used for the depth dimension,
175  the second int for the height dimension and the third int for the width dimension
176
177  Args:
178  kernel_size: the size of the window to take a max over
179  stride: the stride of the window. Default value is :attr:kernel_size
180  padding: implicit zero padding to be added on all three sides
181  dilation: a parameter that controls the stride of elements in the window
182  return_indices: if True, will return the max indices along with the outputs.
183  Useful for :class:torch.nn.MaxUnpool3d later
184  ceil_mode: when True, will use ceil instead of floor to compute the output shape
185
186  Shape:
187  - Input: :math:(N, C, D_{in}, H_{in}, W_{in})
188  - Output: :math:(N, C, D_{out}, H_{out}, W_{out}), where
189
190  .. math::
191  D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times
192  (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
193
194  .. math::
195  H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times
196  (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
197
198  .. math::
199  W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2] \times
200  (\text{kernel\_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor
201
202  Examples::
203
204  >>> # pool of square window of size=3, stride=2
205  >>> m = nn.MaxPool3d(3, stride=2)
206  >>> # pool of non-square window
207  >>> m = nn.MaxPool3d((3, 2, 2), stride=(2, 1, 2))
208  >>> input = torch.randn(20, 16, 50,44, 31)
209  >>> output = m(input)
210
211  .. _link:
212  https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
213  """ # noqa: E501
214
215  @weak_script_method
216  def forward(self, input):
217  return F.max_pool3d(input, self.kernel_size, self.stride,
218  self.padding, self.dilation, self.ceil_mode,
219  self.return_indices)
220
221
222 @weak_module
223 class _MaxUnpoolNd(Module):
224
225  def extra_repr(self):
226  return 'kernel_size={}, stride={}, padding={}'.format(
227  self.kernel_size, self.stride, self.padding
228  )
229
230
231 @weak_module
233  r"""Computes a partial inverse of :class:MaxPool1d.
234
235  :class:MaxPool1d is not fully invertible, since the non-maximal values are lost.
236
237  :class:MaxUnpool1d takes in as input the output of :class:MaxPool1d
238  including the indices of the maximal values and computes a partial inverse
239  in which all non-maximal values are set to zero.
240
241  .. note:: :class:MaxPool1d can map several input sizes to the same output
242  sizes. Hence, the inversion process can get ambiguous.
243  To accommodate this, you can provide the needed output size
244  as an additional argument :attr:output_size in the forward call.
245  See the Inputs and Example below.
246
247  Args:
248  kernel_size (int or tuple): Size of the max pooling window.
249  stride (int or tuple): Stride of the max pooling window.
250  It is set to :attr:kernel_size by default.
251  padding (int or tuple): Padding that was added to the input
252
253  Inputs:
254  - input: the input Tensor to invert
255  - indices: the indices given out by :class:~torch.nn.MaxPool1d
256  - output_size (optional): the targeted output size
257
258  Shape:
259  - Input: :math:(N, C, H_{in})
260  - Output: :math:(N, C, H_{out}), where
261
262  .. math::
263  H_{out} = (H_{in} - 1) \times \text{stride}[0] - 2 \times \text{padding}[0] + \text{kernel\_size}[0]
264
265  or as given by :attr:output_size in the call operator
266
267  Example::
268
269  >>> pool = nn.MaxPool1d(2, stride=2, return_indices=True)
270  >>> unpool = nn.MaxUnpool1d(2, stride=2)
271  >>> input = torch.tensor([[[1., 2, 3, 4, 5, 6, 7, 8]]])
272  >>> output, indices = pool(input)
273  >>> unpool(output, indices)
274  tensor([[[ 0., 2., 0., 4., 0., 6., 0., 8.]]])
275
276  >>> # Example showcasing the use of output_size
277  >>> input = torch.tensor([[[1., 2, 3, 4, 5, 6, 7, 8, 9]]])
278  >>> output, indices = pool(input)
279  >>> unpool(output, indices, output_size=input.size())
280  tensor([[[ 0., 2., 0., 4., 0., 6., 0., 8., 0.]]])
281
282  >>> unpool(output, indices)
283  tensor([[[ 0., 2., 0., 4., 0., 6., 0., 8.]]])
284  """
285
286  def __init__(self, kernel_size, stride=None, padding=0):
287  super(MaxUnpool1d, self).__init__()
288  self.kernel_size = _single(kernel_size)
289  self.stride = _single(stride or kernel_size)
290  self.padding = _single(padding)
291
292  def forward(self, input, indices, output_size=None):
293  return F.max_unpool1d(input, indices, self.kernel_size, self.stride,
294  self.padding, output_size)
295
296
297 @weak_module
299  r"""Computes a partial inverse of :class:MaxPool2d.
300
301  :class:MaxPool2d is not fully invertible, since the non-maximal values are lost.
302
303  :class:MaxUnpool2d takes in as input the output of :class:MaxPool2d
304  including the indices of the maximal values and computes a partial inverse
305  in which all non-maximal values are set to zero.
306
307  .. note:: :class:MaxPool2d can map several input sizes to the same output
308  sizes. Hence, the inversion process can get ambiguous.
309  To accommodate this, you can provide the needed output size
310  as an additional argument :attr:output_size in the forward call.
311  See the Inputs and Example below.
312
313  Args:
314  kernel_size (int or tuple): Size of the max pooling window.
315  stride (int or tuple): Stride of the max pooling window.
316  It is set to :attr:kernel_size by default.
317  padding (int or tuple): Padding that was added to the input
318
319  Inputs:
320  - input: the input Tensor to invert
321  - indices: the indices given out by :class:~torch.nn.MaxPool2d
322  - output_size (optional): the targeted output size
323
324  Shape:
325  - Input: :math:(N, C, H_{in}, W_{in})
326  - Output: :math:(N, C, H_{out}, W_{out}), where
327
328  .. math::
329  H_{out} = (H_{in} - 1) \times \text{stride[0]} - 2 \times \text{padding[0]} + \text{kernel\_size[0]}
330
331  .. math::
332  W_{out} = (W_{in} - 1) \times \text{stride[1]} - 2 \times \text{padding[1]} + \text{kernel\_size[1]}
333
334  or as given by :attr:output_size in the call operator
335
336  Example::
337
338  >>> pool = nn.MaxPool2d(2, stride=2, return_indices=True)
339  >>> unpool = nn.MaxUnpool2d(2, stride=2)
340  >>> input = torch.tensor([[[[ 1., 2, 3, 4],
341  [ 5, 6, 7, 8],
342  [ 9, 10, 11, 12],
343  [13, 14, 15, 16]]]])
344  >>> output, indices = pool(input)
345  >>> unpool(output, indices)
346  tensor([[[[ 0., 0., 0., 0.],
347  [ 0., 6., 0., 8.],
348  [ 0., 0., 0., 0.],
349  [ 0., 14., 0., 16.]]]])
350
351  >>> # specify a different output size than input size
352  >>> unpool(output, indices, output_size=torch.Size([1, 1, 5, 5]))
353  tensor([[[[ 0., 0., 0., 0., 0.],
354  [ 6., 0., 8., 0., 0.],
355  [ 0., 0., 0., 14., 0.],
356  [ 16., 0., 0., 0., 0.],
357  [ 0., 0., 0., 0., 0.]]]])
358  """
359
360  def __init__(self, kernel_size, stride=None, padding=0):
361  super(MaxUnpool2d, self).__init__()
362  self.kernel_size = _pair(kernel_size)
363  self.stride = _pair(stride or kernel_size)
364  self.padding = _pair(padding)
365
366  def forward(self, input, indices, output_size=None):
367  return F.max_unpool2d(input, indices, self.kernel_size, self.stride,
368  self.padding, output_size)
369
370
371 @weak_module
373  r"""Computes a partial inverse of :class:MaxPool3d.
374
375  :class:MaxPool3d is not fully invertible, since the non-maximal values are lost.
376  :class:MaxUnpool3d takes in as input the output of :class:MaxPool3d
377  including the indices of the maximal values and computes a partial inverse
378  in which all non-maximal values are set to zero.
379
380  .. note:: :class:MaxPool3d can map several input sizes to the same output
381  sizes. Hence, the inversion process can get ambiguous.
382  To accommodate this, you can provide the needed output size
383  as an additional argument :attr:output_size in the forward call.
384  See the Inputs section below.
385
386  Args:
387  kernel_size (int or tuple): Size of the max pooling window.
388  stride (int or tuple): Stride of the max pooling window.
389  It is set to :attr:kernel_size by default.
390  padding (int or tuple): Padding that was added to the input
391
392  Inputs:
393  - input: the input Tensor to invert
394  - indices: the indices given out by :class:~torch.nn.MaxPool3d
395  - output_size (optional): the targeted output size
396
397  Shape:
398  - Input: :math:(N, C, D_{in}, H_{in}, W_{in})
399  - Output: :math:(N, C, D_{out}, H_{out}, W_{out}), where
400
401  .. math::
402  D_{out} = (D_{in} - 1) \times \text{stride[0]} - 2 \times \text{padding[0]} + \text{kernel\_size[0]}
403
404  .. math::
405  H_{out} = (H_{in} - 1) \times \text{stride[1]} - 2 \times \text{padding[1]} + \text{kernel\_size[1]}
406
407  .. math::
408  W_{out} = (W_{in} - 1) \times \text{stride[2]} - 2 \times \text{padding[2]} + \text{kernel\_size[2]}
409
410  or as given by :attr:output_size in the call operator
411
412  Example::
413
414  >>> # pool of square window of size=3, stride=2
415  >>> pool = nn.MaxPool3d(3, stride=2, return_indices=True)
416  >>> unpool = nn.MaxUnpool3d(3, stride=2)
417  >>> output, indices = pool(torch.randn(20, 16, 51, 33, 15))
418  >>> unpooled_output = unpool(output, indices)
419  >>> unpooled_output.size()
420  torch.Size([20, 16, 51, 33, 15])
421  """
422
423  def __init__(self, kernel_size, stride=None, padding=0):
424  super(MaxUnpool3d, self).__init__()
425  self.kernel_size = _triple(kernel_size)
426  self.stride = _triple(stride or kernel_size)
427  self.padding = _triple(padding)
428
429  def forward(self, input, indices, output_size=None):
430  return F.max_unpool3d(input, indices, self.kernel_size, self.stride,
431  self.padding, output_size)
432
433
434 @weak_module
435 class _AvgPoolNd(Module):
436  __constants__ = ['kernel_size', 'stride', 'padding', 'ceil_mode', 'count_include_pad']
437
438  def extra_repr(self):
439  return 'kernel_size={}, stride={}, padding={}'.format(
440  self.kernel_size, self.stride, self.padding
441  )
442
443
444 @weak_module
446  r"""Applies a 1D average pooling over an input signal composed of several
447  input planes.
448
449  In the simplest case, the output value of the layer with input size :math:(N, C, L),
450  output :math:(N, C, L_{out}) and :attr:kernel_size :math:k
451  can be precisely described as:
452
453  .. math::
454
455  \text{out}(N_i, C_j, l) = \frac{1}{k} \sum_{m=0}^{k-1}
456  \text{input}(N_i, C_j, \text{stride} \times l + m)
457
458  If :attr:padding is non-zero, then the input is implicitly zero-padded on both sides
459  for :attr:padding number of points.
460
461  The parameters :attr:kernel_size, :attr:stride, :attr:padding can each be
462  an int or a one-element tuple.
463
464  Args:
465  kernel_size: the size of the window
466  stride: the stride of the window. Default value is :attr:kernel_size
467  padding: implicit zero padding to be added on both sides
468  ceil_mode: when True, will use ceil instead of floor to compute the output shape
469  count_include_pad: when True, will include the zero-padding in the averaging calculation
470
471  Shape:
472  - Input: :math:(N, C, L_{in})
473  - Output: :math:(N, C, L_{out}), where
474
475  .. math::
476  L_{out} = \left\lfloor \frac{L_{in} +
477  2 \times \text{padding} - \text{kernel\_size}}{\text{stride}} + 1\right\rfloor
478
479  Examples::
480
481  >>> # pool with window of size=3, stride=2
482  >>> m = nn.AvgPool1d(3, stride=2)
483  >>> m(torch.tensor([[[1.,2,3,4,5,6,7]]]))
484  tensor([[[ 2., 4., 6.]]])
485  """
486  def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False,
487  count_include_pad=True):
488  super(AvgPool1d, self).__init__()
489  self.kernel_size = _single(kernel_size)
490  self.stride = _single(stride if stride is not None else kernel_size)
491  self.padding = _single(padding)
492  self.ceil_mode = ceil_mode
493  self.count_include_pad = count_include_pad
494
495  @weak_script_method
496  def forward(self, input):
497  return F.avg_pool1d(
498  input, self.kernel_size, self.stride, self.padding, self.ceil_mode,
499  self.count_include_pad)
500
501
502 @weak_module
504  r"""Applies a 2D average pooling over an input signal composed of several input
505  planes.
506
507  In the simplest case, the output value of the layer with input size :math:(N, C, H, W),
508  output :math:(N, C, H_{out}, W_{out}) and :attr:kernel_size :math:(kH, kW)
509  can be precisely described as:
510
511  .. math::
512
513  out(N_i, C_j, h, w) = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1}
514  input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
515
516  If :attr:padding is non-zero, then the input is implicitly zero-padded on both sides
517  for :attr:padding number of points.
518
519  The parameters :attr:kernel_size, :attr:stride, :attr:padding can either be:
520
521  - a single int -- in which case the same value is used for the height and width dimension
522  - a tuple of two ints -- in which case, the first int is used for the height dimension,
523  and the second int for the width dimension
524
525  Args:
526  kernel_size: the size of the window
527  stride: the stride of the window. Default value is :attr:kernel_size
528  padding: implicit zero padding to be added on both sides
529  ceil_mode: when True, will use ceil instead of floor to compute the output shape
530  count_include_pad: when True, will include the zero-padding in the averaging calculation
531
532  Shape:
533  - Input: :math:(N, C, H_{in}, W_{in})
534  - Output: :math:(N, C, H_{out}, W_{out}), where
535
536  .. math::
537  H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[0] -
538  \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor
539
540  .. math::
541  W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[1] -
542  \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor
543
544  Examples::
545
546  >>> # pool of square window of size=3, stride=2
547  >>> m = nn.AvgPool2d(3, stride=2)
548  >>> # pool of non-square window
549  >>> m = nn.AvgPool2d((3, 2), stride=(2, 1))
550  >>> input = torch.randn(20, 16, 50, 32)
551  >>> output = m(input)
552  """
553  def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False,
554  count_include_pad=True):
555  super(AvgPool2d, self).__init__()
556  self.kernel_size = kernel_size
557  self.stride = stride or kernel_size
558  self.padding = padding
559  self.ceil_mode = ceil_mode
560  self.count_include_pad = count_include_pad
561
562  @weak_script_method
563  def forward(self, input):
564  return F.avg_pool2d(input, self.kernel_size, self.stride,
565  self.padding, self.ceil_mode, self.count_include_pad)
566
567
568 @weak_module
570  r"""Applies a 3D average pooling over an input signal composed of several input
571  planes.
572
573  In the simplest case, the output value of the layer with input size :math:(N, C, D, H, W),
574  output :math:(N, C, D_{out}, H_{out}, W_{out}) and :attr:kernel_size :math:(kD, kH, kW)
575  can be precisely described as:
576
577  .. math::
578  \begin{aligned}
579  \text{out}(N_i, C_j, d, h, w) ={} & \sum_{k=0}^{kD-1} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1} \\
580  & \frac{\text{input}(N_i, C_j, \text{stride}[0] \times d + k,
581  \text{stride}[1] \times h + m, \text{stride}[2] \times w + n)}
582  {kD \times kH \times kW}
583  \end{aligned}
584
585  If :attr:padding is non-zero, then the input is implicitly zero-padded on all three sides
586  for :attr:padding number of points.
587
588  The parameters :attr:kernel_size, :attr:stride can either be:
589
590  - a single int -- in which case the same value is used for the depth, height and width dimension
591  - a tuple of three ints -- in which case, the first int is used for the depth dimension,
592  the second int for the height dimension and the third int for the width dimension
593
594  Args:
595  kernel_size: the size of the window
596  stride: the stride of the window. Default value is :attr:kernel_size
597  padding: implicit zero padding to be added on all three sides
598  ceil_mode: when True, will use ceil instead of floor to compute the output shape
599  count_include_pad: when True, will include the zero-padding in the averaging calculation
600
601  Shape:
602  - Input: :math:(N, C, D_{in}, H_{in}, W_{in})
603  - Output: :math:(N, C, D_{out}, H_{out}, W_{out}), where
604
605  .. math::
606  D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] -
607  \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor
608
609  .. math::
610  H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] -
611  \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor
612
613  .. math::
614  W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] -
615  \text{kernel\_size}[2]}{\text{stride}[2]} + 1\right\rfloor
616
617  Examples::
618
619  >>> # pool of square window of size=3, stride=2
620  >>> m = nn.AvgPool3d(3, stride=2)
621  >>> # pool of non-square window
622  >>> m = nn.AvgPool3d((3, 2, 2), stride=(2, 1, 2))
623  >>> input = torch.randn(20, 16, 50,44, 31)
624  >>> output = m(input)
625  """
626  def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False,
627  count_include_pad=True):
628  super(AvgPool3d, self).__init__()
629  self.kernel_size = kernel_size
630  self.stride = stride or kernel_size
631  self.padding = padding
632  self.ceil_mode = ceil_mode
633  self.count_include_pad = count_include_pad
634
635  @weak_script_method
636  def forward(self, input):
637  return F.avg_pool3d(input, self.kernel_size, self.stride,
638  self.padding, self.ceil_mode, self.count_include_pad)
639
640  def __setstate__(self, d):
641  super(AvgPool3d, self).__setstate__(d)
642  self.__dict__.setdefault('padding', 0)
643  self.__dict__.setdefault('ceil_mode', False)
644  self.__dict__.setdefault('count_include_pad', True)
645
646
647 @weak_module
648 class FractionalMaxPool2d(Module):
649  r"""Applies a 2D fractional max pooling over an input signal composed of several input planes.
650
651  Fractional MaxPooling is described in detail in the paper Fractional MaxPooling_ by Ben Graham
652
653  The max-pooling operation is applied in :math:kH \times kW regions by a stochastic
654  step size determined by the target output size.
655  The number of output features is equal to the number of input planes.
656
657  Args:
658  kernel_size: the size of the window to take a max over.
659  Can be a single number k (for a square kernel of k x k) or a tuple (kh, kw)
660  output_size: the target output size of the image of the form oH x oW.
661  Can be a tuple (oH, oW) or a single number oH for a square image oH x oH
662  output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given.
663  This has to be a number or tuple in the range (0, 1)
664  return_indices: if True, will return the indices along with the outputs.
665  Useful to pass to :meth:nn.MaxUnpool2d. Default: False
666
667  Examples:
668  >>> # pool of square window of size=3, and target output size 13x12
669  >>> m = nn.FractionalMaxPool2d(3, output_size=(13, 12))
670  >>> # pool of square window and target output size being half of input image size
671  >>> m = nn.FractionalMaxPool2d(3, output_ratio=(0.5, 0.5))
672  >>> input = torch.randn(20, 16, 50, 32)
673  >>> output = m(input)
674
675  .. _Fractional MaxPooling:
676  http://arxiv.org/abs/1412.6071
677  """
678  __constants__ = ['kernel_size', 'return_indices', 'output_size',
679  'output_ratio']
680
681  def __init__(self, kernel_size, output_size=None, output_ratio=None,
682  return_indices=False, _random_samples=None):
683  super(FractionalMaxPool2d, self).__init__()
684  self.kernel_size = _pair(kernel_size)
685  self.return_indices = return_indices
686  self.register_buffer('_random_samples', _random_samples)
687  self.output_size = _pair(output_size) if output_size is not None else None
688  self.output_ratio = _pair(output_ratio) if output_ratio is not None else None
689  if output_size is None and output_ratio is None:
690  raise ValueError("FractionalMaxPool2d requires specifying either "
691  "an output size, or a pooling ratio")
692  if output_size is not None and output_ratio is not None:
693  raise ValueError("only one of output_size and output_ratio may be specified")
694  if self.output_ratio is not None:
695  if not (0 < self.output_ratio[0] < 1 and 0 < self.output_ratio[1] < 1):
696  raise ValueError("output_ratio must be between 0 and 1 (got {})"
697  .format(output_ratio))
698
699  @weak_script_method
700  def forward(self, input):
701  return F.fractional_max_pool2d(
702  input, self.kernel_size, self.output_size, self.output_ratio,
703  self.return_indices,
704  _random_samples=self._random_samples)
705
706
707 @weak_module
708 class FractionalMaxPool3d(Module):
709  r"""Applies a 3D fractional max pooling over an input signal composed of several input planes.
710
711  Fractional MaxPooling is described in detail in the paper Fractional MaxPooling_ by Ben Graham
712
713  The max-pooling operation is applied in :math:kTxkHxkW regions by a stochastic
714  step size determined by the target output size.
715  The number of output features is equal to the number of input planes.
716
717  Args:
718  kernel_size: the size of the window to take a max over.
719  Can be a single number k (for a square kernel of k x k x k) or a tuple (kt x kh x kw)
720  output_size: the target output size of the image of the form oT x oH x oW.
721  Can be a tuple (oT, oH, oW) or a single number oH for a square image oH x oH x oH
722  output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given.
723  This has to be a number or tuple in the range (0, 1)
724  return_indices: if True, will return the indices along with the outputs.
725  Useful to pass to :meth:nn.MaxUnpool3d. Default: False
726
727  Examples:
728  >>> # pool of cubic window of size=3, and target output size 13x12x11
729  >>> m = nn.FractionalMaxPool3d(3, output_size=(13, 12, 11))
730  >>> # pool of cubic window and target output size being half of input size
731  >>> m = nn.FractionalMaxPool3d(3, output_ratio=(0.5, 0.5, 0.5))
732  >>> input = torch.randn(20, 16, 50, 32, 16)
733  >>> output = m(input)
734
735  .. _Fractional MaxPooling:
736  http://arxiv.org/abs/1412.6071
737  """
738  __constants__ = ['kernel_size', 'return_indices', 'output_size',
739  'output_ratio']
740
741  def __init__(self, kernel_size, output_size=None, output_ratio=None,
742  return_indices=False, _random_samples=None):
743  super(FractionalMaxPool3d, self).__init__()
744  self.kernel_size = _triple(kernel_size)
745  self.return_indices = return_indices
746  self.register_buffer('_random_samples', _random_samples)
747  self.output_size = _triple(output_size) if output_size is not None else None
748  self.output_ratio = _triple(output_ratio) if output_ratio is not None else None
749  if output_size is None and output_ratio is None:
750  raise ValueError("FractionalMaxPool3d requires specifying either "
751  "an output size, or a pooling ratio")
752  if output_size is not None and output_ratio is not None:
753  raise ValueError("only one of output_size and output_ratio may be specified")
754  if self.output_ratio is not None:
755  if not (0 < self.output_ratio[0] < 1 and 0 < self.output_ratio[1] < 1 and 0 < self.output_ratio[2] < 1):
756  raise ValueError("output_ratio must be between 0 and 1 (got {})"
757  .format(output_ratio))
758
759  @weak_script_method
760  def forward(self, input):
761  return F.fractional_max_pool3d(
762  input, self.kernel_size, self.output_size, self.output_ratio,
763  self.return_indices,
764  _random_samples=self._random_samples)
765
766
767 @weak_module
768 class _LPPoolNd(Module):
769  __constants__ = ['norm_type', 'kernel_size', 'stride', 'ceil_mode']
770
771  def __init__(self, norm_type, kernel_size, stride=None, ceil_mode=False):
772  super(_LPPoolNd, self).__init__()
773  self.norm_type = norm_type
774  self.kernel_size = kernel_size
775  self.stride = stride
776  self.ceil_mode = ceil_mode
777
778  def extra_repr(self):
779  return 'norm_type={norm_type}, kernel_size={kernel_size}, stride={stride}, ' \
780  'ceil_mode={ceil_mode}'.format(**self.__dict__)
781
782
783 @weak_module
785  r"""Applies a 1D power-average pooling over an input signal composed of several input
786  planes.
787
788  On each window, the function computed is:
789
790  .. math::
791  f(X) = \sqrt[p]{\sum_{x \in X} x^{p}}
792
793  - At p = :math:\infty, one gets Max Pooling
794  - At p = 1, one gets Sum Pooling (which is proportional to Average Pooling)
795
796  .. note:: If the sum to the power of p is zero, the gradient of this function is
797  not defined. This implementation will set the gradient to zero in this case.
798
799  Args:
800  kernel_size: a single int, the size of the window
801  stride: a single int, the stride of the window. Default value is :attr:kernel_size
802  ceil_mode: when True, will use ceil instead of floor to compute the output shape
803
804  Shape:
805  - Input: :math:(N, C, L_{in})
806  - Output: :math:(N, C, L_{out}), where
807
808  .. math::
809  L_{out} = \left\lfloor\frac{L_{in} +
810  2 \times \text{padding} - \text{kernel\_size}}{\text{stride}} + 1\right\rfloor
811
812  Examples::
813  >>> # power-2 pool of window of length 3, with stride 2.
814  >>> m = nn.LPPool1d(2, 3, stride=2)
815  >>> input = torch.randn(20, 16, 50)
816  >>> output = m(input)
817  """
818
819  @weak_script_method
820  @weak_script_method
821  def forward(self, input):
822  return F.lp_pool1d(input, float(self.norm_type), self.kernel_size,
823  self.stride, self.ceil_mode)
824
825
826 @weak_module
828  r"""Applies a 2D power-average pooling over an input signal composed of several input
829  planes.
830
831  On each window, the function computed is:
832
833  .. math::
834  f(X) = \sqrt[p]{\sum_{x \in X} x^{p}}
835
836  - At p = :math:\infty, one gets Max Pooling
837  - At p = 1, one gets Sum Pooling (which is proportional to average pooling)
838
839  The parameters :attr:kernel_size, :attr:stride can either be:
840
841  - a single int -- in which case the same value is used for the height and width dimension
842  - a tuple of two ints -- in which case, the first int is used for the height dimension,
843  and the second int for the width dimension
844
845  .. note:: If the sum to the power of p is zero, the gradient of this function is
846  not defined. This implementation will set the gradient to zero in this case.
847
848  Args:
849  kernel_size: the size of the window
850  stride: the stride of the window. Default value is :attr:kernel_size
851  ceil_mode: when True, will use ceil instead of floor to compute the output shape
852
853  Shape:
854  - Input: :math:(N, C, H_{in}, W_{in})
855  - Output: :math:(N, C, H_{out}, W_{out}), where
856
857  .. math::
858  H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times
859  (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
860
861  .. math::
862  W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times
863  (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
864
865  Examples::
866
867  >>> # power-2 pool of square window of size=3, stride=2
868  >>> m = nn.LPPool2d(2, 3, stride=2)
869  >>> # pool of non-square window of power 1.2
870  >>> m = nn.LPPool2d(1.2, (3, 2), stride=(2, 1))
871  >>> input = torch.randn(20, 16, 50, 32)
872  >>> output = m(input)
873
874  """
875
876  @weak_script_method
877  def forward(self, input):
878  return F.lp_pool2d(input, float(self.norm_type), self.kernel_size,
879  self.stride, self.ceil_mode)
880
881
882 @weak_module
883 class _AdaptiveMaxPoolNd(Module):
884  __constants__ = ['output_size', 'return_indices']
885
886  def __init__(self, output_size, return_indices=False):
887  super(_AdaptiveMaxPoolNd, self).__init__()
888  self.output_size = output_size
889  self.return_indices = return_indices
890
891  def extra_repr(self):
892  return 'output_size={}'.format(self.output_size)
893
894 # FIXME (by @ssnl): Improve adaptive pooling docs: specify what the input and
895 # output shapes are, and how the operation computes output.
896
897
898 @weak_module
900  r"""Applies a 1D adaptive max pooling over an input signal composed of several input planes.
901
902  The output size is H, for any input size.
903  The number of output features is equal to the number of input planes.
904
905  Args:
906  output_size: the target output size H
907  return_indices: if True, will return the indices along with the outputs.
908  Useful to pass to nn.MaxUnpool1d. Default: False
909
910  Examples:
911  >>> # target output size of 5
912  >>> m = nn.AdaptiveMaxPool1d(5)
913  >>> input = torch.randn(1, 64, 8)
914  >>> output = m(input)
915
916  """
917
918  @weak_script_method
919  def forward(self, input):
920  return F.adaptive_max_pool1d(input, self.output_size, self.return_indices)
921
922
923 @weak_module
925  r"""Applies a 2D adaptive max pooling over an input signal composed of several input planes.
926
927  The output is of size H x W, for any input size.
928  The number of output features is equal to the number of input planes.
929
930  Args:
931  output_size: the target output size of the image of the form H x W.
932  Can be a tuple (H, W) or a single H for a square image H x H.
933  H and W can be either a int, or None which means the size will
934  be the same as that of the input.
935  return_indices: if True, will return the indices along with the outputs.
936  Useful to pass to nn.MaxUnpool2d. Default: False
937
938  Examples:
939  >>> # target output size of 5x7
940  >>> m = nn.AdaptiveMaxPool2d((5,7))
941  >>> input = torch.randn(1, 64, 8, 9)
942  >>> output = m(input)
943  >>> # target output size of 7x7 (square)
944  >>> m = nn.AdaptiveMaxPool2d(7)
945  >>> input = torch.randn(1, 64, 10, 9)
946  >>> output = m(input)
947  >>> # target output size of 10x7
948  >>> m = nn.AdaptiveMaxPool2d((None, 7))
949  >>> input = torch.randn(1, 64, 10, 9)
950  >>> output = m(input)
951
952  """
953
954  @weak_script_method
955  def forward(self, input):
956  return F.adaptive_max_pool2d(input, self.output_size, self.return_indices)
957
958
959 @weak_module
961  r"""Applies a 3D adaptive max pooling over an input signal composed of several input planes.
962
963  The output is of size D x H x W, for any input size.
964  The number of output features is equal to the number of input planes.
965
966  Args:
967  output_size: the target output size of the image of the form D x H x W.
968  Can be a tuple (D, H, W) or a single D for a cube D x D x D.
969  D, H and W can be either a int, or None which means the size will
970  be the same as that of the input.
971
972  return_indices: if True, will return the indices along with the outputs.
973  Useful to pass to nn.MaxUnpool3d. Default: False
974
975  Examples:
976  >>> # target output size of 5x7x9
977  >>> m = nn.AdaptiveMaxPool3d((5,7,9))
978  >>> input = torch.randn(1, 64, 8, 9, 10)
979  >>> output = m(input)
980  >>> # target output size of 7x7x7 (cube)
981  >>> m = nn.AdaptiveMaxPool3d(7)
982  >>> input = torch.randn(1, 64, 10, 9, 8)
983  >>> output = m(input)
984  >>> # target output size of 7x9x8
985  >>> m = nn.AdaptiveMaxPool3d((7, None, None))
986  >>> input = torch.randn(1, 64, 10, 9, 8)
987  >>> output = m(input)
988
989  """
990
991  @weak_script_method
992  def forward(self, input):
993  return F.adaptive_max_pool3d(input, self.output_size, self.return_indices)
994
995
996 @weak_module
997 class _AdaptiveAvgPoolNd(Module):
998  __constants__ = ['output_size']
999
1000  def __init__(self, output_size):
1001  super(_AdaptiveAvgPoolNd, self).__init__()
1002  self.output_size = output_size
1003
1004  def extra_repr(self):
1005  return 'output_size={}'.format(self.output_size)
1006
1007
1008 @weak_module
1010  r"""Applies a 1D adaptive average pooling over an input signal composed of several input planes.
1011
1012  The output size is H, for any input size.
1013  The number of output features is equal to the number of input planes.
1014
1015  Args:
1016  output_size: the target output size H
1017
1018  Examples:
1019  >>> # target output size of 5
1020  >>> m = nn.AdaptiveAvgPool1d(5)
1021  >>> input = torch.randn(1, 64, 8)
1022  >>> output = m(input)
1023
1024  """
1025
1026  @weak_script_method
1027  def forward(self, input):
1028  return F.adaptive_avg_pool1d(input, self.output_size)
1029
1030
1031 @weak_module
1033  r"""Applies a 2D adaptive average pooling over an input signal composed of several input planes.
1034
1035  The output is of size H x W, for any input size.
1036  The number of output features is equal to the number of input planes.
1037
1038  Args:
1039  output_size: the target output size of the image of the form H x W.
1040  Can be a tuple (H, W) or a single H for a square image H x H.
1041  H and W can be either a int, or None which means the size will
1042  be the same as that of the input.
1043
1044  Examples:
1045  >>> # target output size of 5x7
1046  >>> m = nn.AdaptiveAvgPool2d((5,7))
1047  >>> input = torch.randn(1, 64, 8, 9)
1048  >>> output = m(input)
1049  >>> # target output size of 7x7 (square)
1050  >>> m = nn.AdaptiveAvgPool2d(7)
1051  >>> input = torch.randn(1, 64, 10, 9)
1052  >>> output = m(input)
1053  >>> # target output size of 10x7
1054  >>> m = nn.AdaptiveMaxPool2d((None, 7))
1055  >>> input = torch.randn(1, 64, 10, 9)
1056  >>> output = m(input)
1057
1058  """
1059
1060  @weak_script_method
1061  def forward(self, input):
1062  return F.adaptive_avg_pool2d(input, self.output_size)
1063
1064
1065 @weak_module
1067  r"""Applies a 3D adaptive average pooling over an input signal composed of several input planes.
1068
1069  The output is of size D x H x W, for any input size.
1070  The number of output features is equal to the number of input planes.
1071
1072  Args:
1073  output_size: the target output size of the form D x H x W.
1074  Can be a tuple (D, H, W) or a single number D for a cube D x D x D.
1075  D, H and W can be either a int, or None which means the size will
1076  be the same as that of the input.
1077
1078  Examples:
1079  >>> # target output size of 5x7x9
1080  >>> m = nn.AdaptiveAvgPool3d((5,7,9))
1081  >>> input = torch.randn(1, 64, 8, 9, 10)
1082  >>> output = m(input)
1083  >>> # target output size of 7x7x7 (cube)
1084  >>> m = nn.AdaptiveAvgPool3d(7)
1085  >>> input = torch.randn(1, 64, 10, 9, 8)
1086  >>> output = m(input)
1087  >>> # target output size of 7x9x8
1088  >>> m = nn.AdaptiveMaxPool3d((7, None, None))
1089  >>> input = torch.randn(1, 64, 10, 9, 8)
1090  >>> output = m(input)
1091
1092  """
1093
1094  @weak_script_method
1095  def forward(self, input):
1096  return F.adaptive_avg_pool3d(input, self.output_size)