Caffe2 - Python API
A deep learning, cross platform ML framework
upsampling.py
1 from numbers import Integral
2 import warnings
3 
4 from .module import Module
5 from .. import functional as F
6 from ..._jit_internal import weak_module, weak_script_method
7 
8 
9 @weak_module
10 class Upsample(Module):
11  r"""Upsamples a given multi-channel 1D (temporal), 2D (spatial) or 3D (volumetric) data.
12 
13  The input data is assumed to be of the form
14  `minibatch x channels x [optional depth] x [optional height] x width`.
15  Hence, for spatial inputs, we expect a 4D Tensor and for volumetric inputs, we expect a 5D Tensor.
16 
17  The algorithms available for upsampling are nearest neighbor and linear,
18  bilinear, bicubic and trilinear for 3D, 4D and 5D input Tensor,
19  respectively.
20 
21  One can either give a :attr:`scale_factor` or the target output :attr:`size` to
22  calculate the output size. (You cannot give both, as it is ambiguous)
23 
24  Args:
25  size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int],
26  optional): output spatial sizes
27  scale_factor (float or Tuple[float] or Tuple[float, float] or
28  Tuple[float, float, float], optional): multiplier for spatial size.
29  Has to match input size if it is a tuple.
30  mode (str, optional): the upsampling algorithm: one of ``'nearest'``,
31  ``'linear'``, ``'bilinear'``, ``'bicubic'`` and ``'trilinear'``.
32  Default: ``'nearest'``
33  align_corners (bool, optional): if ``True``, the corner pixels of the input
34  and output tensors are aligned, and thus preserving the values at
35  those pixels. This only has effect when :attr:`mode` is
36  ``'linear'``, ``'bilinear'``, or ``'trilinear'``. Default: ``False``
37 
38  Shape:
39  - Input: :math:`(N, C, W_{in})`, :math:`(N, C, H_{in}, W_{in})` or :math:`(N, C, D_{in}, H_{in}, W_{in})`
40  - Output: :math:`(N, C, W_{out})`, :math:`(N, C, H_{out}, W_{out})`
41  or :math:`(N, C, D_{out}, H_{out}, W_{out})`, where
42 
43  .. math::
44  D_{out} = \left\lfloor D_{in} \times \text{scale\_factor} \right\rfloor
45 
46  .. math::
47  H_{out} = \left\lfloor H_{in} \times \text{scale\_factor} \right\rfloor
48 
49  .. math::
50  W_{out} = \left\lfloor W_{in} \times \text{scale\_factor} \right\rfloor
51 
52  .. warning::
53  With ``align_corners = True``, the linearly interpolating modes
54  (`linear`, `bilinear`, `bicubic`, and `trilinear`) don't proportionally
55  align the output and input pixels, and thus the output values can depend
56  on the input size. This was the default behavior for these modes up to
57  version 0.3.1. Since then, the default behavior is
58  ``align_corners = False``. See below for concrete examples on how this
59  affects the outputs.
60 
61  .. note::
62  If you want downsampling/general resizing, you should use :func:`~nn.functional.interpolate`.
63 
64  Examples::
65 
66  >>> input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)
67  >>> input
68  tensor([[[[ 1., 2.],
69  [ 3., 4.]]]])
70 
71  >>> m = nn.Upsample(scale_factor=2, mode='nearest')
72  >>> m(input)
73  tensor([[[[ 1., 1., 2., 2.],
74  [ 1., 1., 2., 2.],
75  [ 3., 3., 4., 4.],
76  [ 3., 3., 4., 4.]]]])
77 
78  >>> m = nn.Upsample(scale_factor=2, mode='bilinear') # align_corners=False
79  >>> m(input)
80  tensor([[[[ 1.0000, 1.2500, 1.7500, 2.0000],
81  [ 1.5000, 1.7500, 2.2500, 2.5000],
82  [ 2.5000, 2.7500, 3.2500, 3.5000],
83  [ 3.0000, 3.2500, 3.7500, 4.0000]]]])
84 
85  >>> m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
86  >>> m(input)
87  tensor([[[[ 1.0000, 1.3333, 1.6667, 2.0000],
88  [ 1.6667, 2.0000, 2.3333, 2.6667],
89  [ 2.3333, 2.6667, 3.0000, 3.3333],
90  [ 3.0000, 3.3333, 3.6667, 4.0000]]]])
91 
92  >>> # Try scaling the same data in a larger tensor
93  >>>
94  >>> input_3x3 = torch.zeros(3, 3).view(1, 1, 3, 3)
95  >>> input_3x3[:, :, :2, :2].copy_(input)
96  tensor([[[[ 1., 2.],
97  [ 3., 4.]]]])
98  >>> input_3x3
99  tensor([[[[ 1., 2., 0.],
100  [ 3., 4., 0.],
101  [ 0., 0., 0.]]]])
102 
103  >>> m = nn.Upsample(scale_factor=2, mode='bilinear') # align_corners=False
104  >>> # Notice that values in top left corner are the same with the small input (except at boundary)
105  >>> m(input_3x3)
106  tensor([[[[ 1.0000, 1.2500, 1.7500, 1.5000, 0.5000, 0.0000],
107  [ 1.5000, 1.7500, 2.2500, 1.8750, 0.6250, 0.0000],
108  [ 2.5000, 2.7500, 3.2500, 2.6250, 0.8750, 0.0000],
109  [ 2.2500, 2.4375, 2.8125, 2.2500, 0.7500, 0.0000],
110  [ 0.7500, 0.8125, 0.9375, 0.7500, 0.2500, 0.0000],
111  [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]])
112 
113  >>> m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
114  >>> # Notice that values in top left corner are now changed
115  >>> m(input_3x3)
116  tensor([[[[ 1.0000, 1.4000, 1.8000, 1.6000, 0.8000, 0.0000],
117  [ 1.8000, 2.2000, 2.6000, 2.2400, 1.1200, 0.0000],
118  [ 2.6000, 3.0000, 3.4000, 2.8800, 1.4400, 0.0000],
119  [ 2.4000, 2.7200, 3.0400, 2.5600, 1.2800, 0.0000],
120  [ 1.2000, 1.3600, 1.5200, 1.2800, 0.6400, 0.0000],
121  [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]])
122  """
123  __constants__ = ['size', 'scale_factor', 'mode', 'align_corners', 'name']
124 
125  def __init__(self, size=None, scale_factor=None, mode='nearest', align_corners=None):
126  super(Upsample, self).__init__()
127  self.name = type(self).__name__
128  self.size = size
129  self.scale_factor = float(scale_factor) if scale_factor else None
130  self.mode = mode
131  self.align_corners = align_corners
132 
133  @weak_script_method
134  def forward(self, input):
135  return F.interpolate(input, self.size, self.scale_factor, self.mode, self.align_corners)
136 
137  def extra_repr(self):
138  if self.scale_factor is not None:
139  info = 'scale_factor=' + str(self.scale_factor)
140  else:
141  info = 'size=' + str(self.size)
142  info += ', mode=' + self.mode
143  return info
144 
145 
146 @weak_module
148  r"""Applies a 2D nearest neighbor upsampling to an input signal composed of several input
149  channels.
150 
151  To specify the scale, it takes either the :attr:`size` or the :attr:`scale_factor`
152  as it's constructor argument.
153 
154  When :attr:`size` is given, it is the output size of the image `(h, w)`.
155 
156  Args:
157  size (int or Tuple[int, int], optional): output spatial sizes
158  scale_factor (float or Tuple[float, float], optional): multiplier for
159  spatial size.
160 
161  .. warning::
162  This class is deprecated in favor of :func:`~nn.functional.interpolate`.
163 
164  Shape:
165  - Input: :math:`(N, C, H_{in}, W_{in})`
166  - Output: :math:`(N, C, H_{out}, W_{out})` where
167 
168  .. math::
169  H_{out} = \left\lfloor H_{in} \times \text{scale\_factor} \right\rfloor
170 
171  .. math::
172  W_{out} = \left\lfloor W_{in} \times \text{scale\_factor} \right\rfloor
173 
174  Examples::
175 
176  >>> input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)
177  >>> input
178  tensor([[[[ 1., 2.],
179  [ 3., 4.]]]])
180 
181  >>> m = nn.UpsamplingNearest2d(scale_factor=2)
182  >>> m(input)
183  tensor([[[[ 1., 1., 2., 2.],
184  [ 1., 1., 2., 2.],
185  [ 3., 3., 4., 4.],
186  [ 3., 3., 4., 4.]]]])
187  """
188  def __init__(self, size=None, scale_factor=None):
189  super(UpsamplingNearest2d, self).__init__(size, scale_factor, mode='nearest')
190 
191 
192 @weak_module
194  r"""Applies a 2D bilinear upsampling to an input signal composed of several input
195  channels.
196 
197  To specify the scale, it takes either the :attr:`size` or the :attr:`scale_factor`
198  as it's constructor argument.
199 
200  When :attr:`size` is given, it is the output size of the image `(h, w)`.
201 
202  Args:
203  size (int or Tuple[int, int], optional): output spatial sizes
204  scale_factor (float or Tuple[float, float], optional): multiplier for
205  spatial size.
206 
207  .. warning::
208  This class is deprecated in favor of :func:`~nn.functional.interpolate`. It is
209  equivalent to ``nn.functional.interpolate(..., mode='bilinear', align_corners=True)``.
210 
211  Shape:
212  - Input: :math:`(N, C, H_{in}, W_{in})`
213  - Output: :math:`(N, C, H_{out}, W_{out})` where
214 
215  .. math::
216  H_{out} = \left\lfloor H_{in} \times \text{scale\_factor} \right\rfloor
217 
218  .. math::
219  W_{out} = \left\lfloor W_{in} \times \text{scale\_factor} \right\rfloor
220 
221  Examples::
222 
223  >>> input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2)
224  >>> input
225  tensor([[[[ 1., 2.],
226  [ 3., 4.]]]])
227 
228  >>> m = nn.UpsamplingBilinear2d(scale_factor=2)
229  >>> m(input)
230  tensor([[[[ 1.0000, 1.3333, 1.6667, 2.0000],
231  [ 1.6667, 2.0000, 2.3333, 2.6667],
232  [ 2.3333, 2.6667, 3.0000, 3.3333],
233  [ 3.0000, 3.3333, 3.6667, 4.0000]]]])
234  """
235  def __init__(self, size=None, scale_factor=None):
236  super(UpsamplingBilinear2d, self).__init__(size, scale_factor, mode='bilinear', align_corners=True)