Caffe2 - Python API
A deep learning, cross platform ML framework
functional.py
1 r"""Functional interface"""
2 from __future__ import division
3 
4 import warnings
5 import math
6 import types
7 
8 import torch
9 from torch._C import _infer_size, _add_docstr
10 from . import _reduction as _Reduction
11 from . import _functions
12 from .modules import utils
13 from ._functions import vision
14 from .modules.utils import _single, _pair, _triple, _list_with_default
15 from . import grad
16 from . import _VF
17 from .._jit_internal import weak_script, List
18 
19 
20 conv1d = _add_docstr(torch.conv1d, r"""
21 conv1d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros') -> Tensor
22 
23 Applies a 1D convolution over an input signal composed of several input
24 planes.
25 
26 See :class:`~torch.nn.Conv1d` for details and output shape.
27 
28 .. include:: cudnn_deterministic.rst
29 
30 Args:
31  input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)`
32  weight: filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kW)`
33  bias: optional bias of shape :math:`(\text{out\_channels})`. Default: ``None``
34  stride: the stride of the convolving kernel. Can be a single number or
35  a one-element tuple `(sW,)`. Default: 1
36  padding: implicit paddings on both sides of the input. Can be a
37  single number or a one-element tuple `(padW,)`. Default: 0
38  dilation: the spacing between kernel elements. Can be a single number or
39  a one-element tuple `(dW,)`. Default: 1
40  groups: split input into groups, :math:`\text{in\_channels}` should be divisible by
41  the number of groups. Default: 1
42  padding_mode: the type of paddings applied to both sided can be: `zeros` or `circular`. Default: `zeros`
43 
44 Examples::
45 
46  >>> filters = torch.randn(33, 16, 3)
47  >>> inputs = torch.randn(20, 16, 50)
48  >>> F.conv1d(inputs, filters)
49 """)
50 
51 conv2d = _add_docstr(torch.conv2d, r"""
52 conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros') -> Tensor
53 
54 Applies a 2D convolution over an input image composed of several input
55 planes.
56 
57 See :class:`~torch.nn.Conv2d` for details and output shape.
58 
59 .. include:: cudnn_deterministic.rst
60 
61 Args:
62  input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`
63  weight: filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kH , kW)`
64  bias: optional bias tensor of shape :math:`(\text{out\_channels})`. Default: ``None``
65  stride: the stride of the convolving kernel. Can be a single number or a
66  tuple `(sH, sW)`. Default: 1
67  padding: implicit paddings on both sides of the input. Can be a
68  single number or a tuple `(padH, padW)`. Default: 0
69  dilation: the spacing between kernel elements. Can be a single number or
70  a tuple `(dH, dW)`. Default: 1
71  groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the
72  number of groups. Default: 1
73  padding_mode: the type of paddings applied to both sided can be: `zeros` or `circular`. Default: `zeros`
74 
75 Examples::
76 
77  >>> # With square kernels and equal stride
78  >>> filters = torch.randn(8,4,3,3)
79  >>> inputs = torch.randn(1,4,5,5)
80  >>> F.conv2d(inputs, filters, padding=1)
81 """) # noqa: E501
82 
83 conv3d = _add_docstr(torch.conv3d, r"""
84 conv3d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros') -> Tensor
85 
86 Applies a 3D convolution over an input image composed of several input
87 planes.
88 
89 See :class:`~torch.nn.Conv3d` for details and output shape.
90 
91 .. include:: cudnn_deterministic.rst
92 
93 Args:
94  input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iT , iH , iW)`
95  weight: filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kT , kH , kW)`
96  bias: optional bias tensor of shape :math:`(\text{out\_channels})`. Default: None
97  stride: the stride of the convolving kernel. Can be a single number or a
98  tuple `(sT, sH, sW)`. Default: 1
99  padding: implicit paddings on both sides of the input. Can be a
100  single number or a tuple `(padT, padH, padW)`. Default: 0
101  dilation: the spacing between kernel elements. Can be a single number or
102  a tuple `(dT, dH, dW)`. Default: 1
103  groups: split input into groups, :math:`\text{in\_channels}` should be divisible by
104  the number of groups. Default: 1
105  padding_mode: the type of paddings applied to both sided can be: `zeros` or `circular`. Default: `zeros`
106 
107 Examples::
108 
109  >>> filters = torch.randn(33, 16, 3, 3, 3)
110  >>> inputs = torch.randn(20, 16, 50, 10, 20)
111  >>> F.conv3d(inputs, filters)
112 """) # noqa: E501
113 
114 conv_transpose1d = _add_docstr(torch.conv_transpose1d, r"""
115 conv_transpose1d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1) -> Tensor
116 
117 Applies a 1D transposed convolution operator over an input signal
118 composed of several input planes, sometimes also called "deconvolution".
119 
120 See :class:`~torch.nn.ConvTranspose1d` for details and output shape.
121 
122 .. include:: cudnn_deterministic.rst
123 
124 Args:
125  input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)`
126  weight: filters of shape :math:`(\text{in\_channels} , \frac{\text{out\_channels}}{\text{groups}} , kW)`
127  bias: optional bias of shape :math:`(\text{out\_channels})`. Default: None
128  stride: the stride of the convolving kernel. Can be a single number or a
129  tuple ``(sW,)``. Default: 1
130  padding: ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both
131  sides of each dimension in the input. Can be a single number or a tuple
132  ``(padW,)``. Default: 0
133  output_padding: additional size added to one side of each dimension in the
134  output shape. Can be a single number or a tuple ``(out_padW)``. Default: 0
135  groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the
136  number of groups. Default: 1
137  dilation: the spacing between kernel elements. Can be a single number or
138  a tuple ``(dW,)``. Default: 1
139 
140 Examples::
141 
142  >>> inputs = torch.randn(20, 16, 50)
143  >>> weights = torch.randn(16, 33, 5)
144  >>> F.conv_transpose1d(inputs, weights)
145 """)
146 
147 conv_transpose2d = _add_docstr(torch.conv_transpose2d, r"""
148 conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1) -> Tensor
149 
150 Applies a 2D transposed convolution operator over an input image
151 composed of several input planes, sometimes also called "deconvolution".
152 
153 See :class:`~torch.nn.ConvTranspose2d` for details and output shape.
154 
155 .. include:: cudnn_deterministic.rst
156 
157 Args:
158  input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`
159  weight: filters of shape :math:`(\text{in\_channels} , \frac{\text{out\_channels}}{\text{groups}} , kH , kW)`
160  bias: optional bias of shape :math:`(\text{out\_channels})`. Default: None
161  stride: the stride of the convolving kernel. Can be a single number or a
162  tuple ``(sH, sW)``. Default: 1
163  padding: ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both
164  sides of each dimension in the input. Can be a single number or a tuple
165  ``(padH, padW)``. Default: 0
166  output_padding: additional size added to one side of each dimension in the
167  output shape. Can be a single number or a tuple ``(out_padH, out_padW)``.
168  Default: 0
169  groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the
170  number of groups. Default: 1
171  dilation: the spacing between kernel elements. Can be a single number or
172  a tuple ``(dH, dW)``. Default: 1
173 
174 Examples::
175 
176  >>> # With square kernels and equal stride
177  >>> inputs = torch.randn(1, 4, 5, 5)
178  >>> weights = torch.randn(4, 8, 3, 3)
179  >>> F.conv_transpose2d(inputs, weights, padding=1)
180 """) # noqa: E501
181 
182 conv_transpose3d = _add_docstr(torch.conv_transpose3d, r"""
183 conv_transpose3d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1) -> Tensor
184 
185 Applies a 3D transposed convolution operator over an input image
186 composed of several input planes, sometimes also called "deconvolution"
187 
188 See :class:`~torch.nn.ConvTranspose3d` for details and output shape.
189 
190 .. include:: cudnn_deterministic.rst
191 
192 Args:
193  input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iT , iH , iW)`
194  weight: filters of shape :math:`(\text{in\_channels} , \frac{\text{out\_channels}}{\text{groups}} , kT , kH , kW)`
195  bias: optional bias of shape :math:`(\text{out\_channels})`. Default: None
196  stride: the stride of the convolving kernel. Can be a single number or a
197  tuple ``(sT, sH, sW)``. Default: 1
198  padding: ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both
199  sides of each dimension in the input. Can be a single number or a tuple
200  ``(padT, padH, padW)``. Default: 0
201  output_padding: additional size added to one side of each dimension in the
202  output shape. Can be a single number or a tuple
203  ``(out_padT, out_padH, out_padW)``. Default: 0
204  groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the
205  number of groups. Default: 1
206  dilation: the spacing between kernel elements. Can be a single number or
207  a tuple `(dT, dH, dW)`. Default: 1
208 
209 Examples::
210 
211  >>> inputs = torch.randn(20, 16, 50, 10, 20)
212  >>> weights = torch.randn(16, 33, 3, 3, 3)
213  >>> F.conv_transpose3d(inputs, weights)
214 """) # noqa: E501
215 
216 conv_tbc = _add_docstr(torch.conv_tbc, r"""
217 Applies a 1-dimensional sequence convolution over an input sequence.
218 Input and output dimensions are (Time, Batch, Channels) - hence TBC.
219 
220 Args:
221  input: input tensor of shape :math:`(\text{sequence length} \times batch \times \text{in\_channels})`
222  weight: filter of shape (:math:`\text{kernel width} \times \text{in\_channels} \times \text{out\_channels}`)
223  bias: bias of shape (:math:`\text{out\_channels}`)
224  pad: number of timesteps to pad. Default: 0
225 """)
226 
227 
228 # Pooling
229 avg_pool1d = _add_docstr(torch.avg_pool1d, r"""
230 avg_pool1d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True) -> Tensor
231 
232 Applies a 1D average pooling over an input signal composed of several
233 input planes.
234 
235 See :class:`~torch.nn.AvgPool1d` for details and output shape.
236 
237 Args:
238  input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)`
239  kernel_size: the size of the window. Can be a single number or a
240  tuple `(kW,)`
241  stride: the stride of the window. Can be a single number or a tuple
242  `(sW,)`. Default: :attr:`kernel_size`
243  padding: implicit zero paddings on both sides of the input. Can be a
244  single number or a tuple `(padW,)`. Default: 0
245  ceil_mode: when True, will use `ceil` instead of `floor` to compute the
246  output shape. Default: ``False``
247  count_include_pad: when True, will include the zero-padding in the
248  averaging calculation. Default: ``True``
249 
250 Examples::
251 
252  >>> # pool of square window of size=3, stride=2
253  >>> input = torch.tensor([[[1, 2, 3, 4, 5, 6, 7]]], dtype=torch.float32)
254  >>> F.avg_pool1d(input, kernel_size=3, stride=2)
255  tensor([[[ 2., 4., 6.]]])
256 
257 """)
258 
259 
260 avg_pool2d = _add_docstr(torch._C._nn.avg_pool2d, r"""
261 avg_pool2d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True) -> Tensor
262 
263 Applies 2D average-pooling operation in :math:`kH \times kW` regions by step size
264 :math:`sH \times sW` steps. The number of output features is equal to the number of
265 input planes.
266 
267 See :class:`~torch.nn.AvgPool2d` for details and output shape.
268 
269 Args:
270  input: input tensor :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`
271  kernel_size: size of the pooling region. Can be a single number or a
272  tuple `(kH, kW)`
273  stride: stride of the pooling operation. Can be a single number or a
274  tuple `(sH, sW)`. Default: :attr:`kernel_size`
275  padding: implicit zero paddings on both sides of the input. Can be a
276  single number or a tuple `(padH, padW)`. Default: 0
277  ceil_mode: when True, will use `ceil` instead of `floor` in the formula
278  to compute the output shape. Default: ``False``
279  count_include_pad: when True, will include the zero-padding in the
280  averaging calculation. Default: ``True``
281 """)
282 
283 avg_pool3d = _add_docstr(torch._C._nn.avg_pool3d, r"""
284 avg_pool3d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True) -> Tensor
285 
286 Applies 3D average-pooling operation in :math:`kT \times kH \times kW` regions by step
287 size :math:`sT \times sH \times sW` steps. The number of output features is equal to
288 :math:`\lfloor\frac{\text{input planes}}{sT}\rfloor`.
289 
290 See :class:`~torch.nn.AvgPool3d` for details and output shape.
291 
292 Args:
293  input: input tensor :math:`(\text{minibatch} , \text{in\_channels} , iT \times iH , iW)`
294  kernel_size: size of the pooling region. Can be a single number or a
295  tuple `(kT, kH, kW)`
296  stride: stride of the pooling operation. Can be a single number or a
297  tuple `(sT, sH, sW)`. Default: :attr:`kernel_size`
298  padding: implicit zero paddings on both sides of the input. Can be a
299  single number or a tuple `(padT, padH, padW)`, Default: 0
300  ceil_mode: when True, will use `ceil` instead of `floor` in the formula
301  to compute the output shape
302  count_include_pad: when True, will include the zero-padding in the
303  averaging calculation
304 """)
305 
306 
307 @weak_script
308 def fractional_max_pool2d_with_indices(input, kernel_size, output_size=None,
309  output_ratio=None, return_indices=False,
310  _random_samples=None):
311  # type: (Tensor, BroadcastingList2[int], Optional[BroadcastingList2[int]], Optional[BroadcastingList2[float]], bool, Optional[Tensor]) -> Tuple[Tensor, Tensor] # noqa
312  r"""Applies 2D fractional max pooling over an input signal composed of several input planes.
313 
314  Fractional MaxPooling is described in detail in the paper `Fractional MaxPooling`_ by Ben Graham
315 
316  The max-pooling operation is applied in :math:`kH \times kW` regions by a stochastic
317  step size determined by the target output size.
318  The number of output features is equal to the number of input planes.
319 
320  Args:
321  kernel_size: the size of the window to take a max over.
322  Can be a single number :math:`k` (for a square kernel of :math:`k \times k`)
323  or a tuple `(kH, kW)`
324  output_size: the target output size of the image of the form :math:`oH \times oW`.
325  Can be a tuple `(oH, oW)` or a single number :math:`oH` for a square image :math:`oH \times oH`
326  output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given.
327  This has to be a number or tuple in the range (0, 1)
328  return_indices: if ``True``, will return the indices along with the outputs.
329  Useful to pass to :func:`~torch.nn.functional.max_unpool2d`.
330 
331  Examples::
332  >>> input = torch.randn(20, 16, 50, 32)
333  >>> # pool of square window of size=3, and target output size 13x12
334  >>> F.fractional_max_pool2d(input, 3, output_size=(13, 12))
335  >>> # pool of square window and target output size being half of input image size
336  >>> F.fractional_max_pool2d(input, 3, output_ratio=(0.5, 0.5))
337 
338  .. _Fractional MaxPooling:
339  http://arxiv.org/abs/1412.6071
340  """
341  if output_size is None and output_ratio is None:
342  raise ValueError("fractional_max_pool2d requires specifying either "
343  "an output_size or an output_ratio")
344  if output_size is None:
345  _output_ratio = _pair(torch.jit._unwrap_optional(output_ratio))
346  output_size = [int(input.size(2) * _output_ratio[0]),
347  int(input.size(3) * _output_ratio[1])]
348 
349  if _random_samples is None:
350  _random_samples = torch.rand(input.size(0), input.size(1), 2, dtype=input.dtype, device=input.device)
351  return torch._C._nn.fractional_max_pool2d(input, kernel_size, output_size, _random_samples)
352 
353 
354 @weak_script
355 def _fractional_max_pool2d(input, kernel_size, output_size=None,
356  output_ratio=None, return_indices=False,
357  _random_samples=None):
358  # type: (Tensor, BroadcastingList2[int], Optional[BroadcastingList2[int]], Optional[BroadcastingList2[float]], bool, Optional[Tensor]) -> Tensor # noqa
359  return fractional_max_pool2d_with_indices(input, kernel_size, output_size,
360  output_ratio, return_indices,
361  _random_samples)[0]
362 
363 fractional_max_pool2d = torch._jit_internal.boolean_dispatch(
364  arg_name='return_indices',
365  arg_index=4,
366  default=False,
367  if_true=fractional_max_pool2d_with_indices,
368  if_false=_fractional_max_pool2d,
369  module_name=__name__,
370  func_name='fractional_max_pool2d')
371 
372 
373 @weak_script
374 def fractional_max_pool3d_with_indices(input, kernel_size, output_size=None,
375  output_ratio=None, return_indices=False,
376  _random_samples=None):
377  # type: (Tensor, BroadcastingList3[int], Optional[BroadcastingList3[int]], Optional[BroadcastingList3[float]], bool, Optional[Tensor]) -> Tuple[Tensor, Tensor] # noqa
378  r"""Applies 3D fractional max pooling over an input signal composed of several input planes.
379 
380  Fractional MaxPooling is described in detail in the paper `Fractional MaxPooling`_ by Ben Graham
381 
382  The max-pooling operation is applied in :math:`kT \times kH \times kW` regions by a stochastic
383  step size determined by the target output size.
384  The number of output features is equal to the number of input planes.
385 
386  Args:
387  kernel_size: the size of the window to take a max over.
388  Can be a single number :math:`k` (for a square kernel of :math:`k \times k \times k`)
389  or a tuple `(kT, kH, kW)`
390  output_size: the target output size of the form :math:`oT \times oH \times oW`.
391  Can be a tuple `(oT, oH, oW)` or a single number :math:`oH` for a cubic output
392  :math:`oH \times oH \times oH`
393  output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given.
394  This has to be a number or tuple in the range (0, 1)
395  return_indices: if ``True``, will return the indices along with the outputs.
396  Useful to pass to :func:`~torch.nn.functional.max_unpool3d`.
397 
398  Examples::
399  >>> input = torch.randn(20, 16, 50, 32, 16)
400  >>> # pool of cubic window of size=3, and target output size 13x12x11
401  >>> F.fractional_max_pool3d(input, 3, output_size=(13, 12, 11))
402  >>> # pool of cubic window and target output size being half of input size
403  >>> F.fractional_max_pool3d(input, 3, output_ratio=(0.5, 0.5, 0.5))
404 
405  .. _Fractional MaxPooling:
406  http://arxiv.org/abs/1412.6071
407  """
408  if output_size is None and output_ratio is None:
409  raise ValueError("fractional_max_pool3d requires specifying either "
410  "an output_size or an output_ratio")
411  if output_size is None:
412  _output_ratio = _triple(torch.jit._unwrap_optional(output_ratio))
413  output_size = [int(input.size(2) * _output_ratio[0]),
414  int(input.size(3) * _output_ratio[1]),
415  int(input.size(4) * _output_ratio[2])]
416 
417  if _random_samples is None:
418  _random_samples = torch.rand(input.size(0), input.size(1), 3, dtype=input.dtype, device=input.device)
419  return torch._C._nn.fractional_max_pool3d(input, kernel_size, output_size, _random_samples)
420 
421 
422 @weak_script
423 def _fractional_max_pool3d(input, kernel_size, output_size=None,
424  output_ratio=None, return_indices=False,
425  _random_samples=None):
426  # type: (Tensor, BroadcastingList3[int], Optional[BroadcastingList3[int]], Optional[BroadcastingList3[float]], bool, Optional[Tensor]) -> Tensor # noqa
427  return fractional_max_pool3d_with_indices(input, kernel_size, output_size,
428  output_ratio, return_indices,
429  _random_samples)[0]
430 
431 fractional_max_pool3d = torch._jit_internal.boolean_dispatch(
432  arg_name='return_indices',
433  arg_index=4,
434  default=False,
435  if_true=fractional_max_pool3d_with_indices,
436  if_false=_fractional_max_pool3d,
437  module_name=__name__,
438  func_name='fractional_max_pool3d')
439 
440 
441 @weak_script
442 def max_pool1d_with_indices(input, kernel_size, stride=None, padding=0,
443  dilation=1, ceil_mode=False, return_indices=False):
444  # type: (Tensor, BroadcastingList1[int], Optional[BroadcastingList1[int]], BroadcastingList1[int], BroadcastingList1[int], bool, bool) -> Tuple[Tensor, Tensor] # noqa
445  r"""Applies a 1D max pooling over an input signal composed of several input
446  planes.
447 
448  See :class:`~torch.nn.MaxPool1d` for details.
449  """
450  if stride is None:
451  stride = torch.jit.annotate(List[int], [])
452  return torch.max_pool1d_with_indices(
453  input, kernel_size, stride, padding, dilation, ceil_mode)
454 
455 
456 @weak_script
457 def _max_pool1d(input, kernel_size, stride=None, padding=0, dilation=1,
458  ceil_mode=False, return_indices=False):
459  # type: (Tensor, BroadcastingList1[int], Optional[BroadcastingList1[int]], BroadcastingList1[int], BroadcastingList1[int], bool, bool) -> Tensor # noqa
460  return max_pool1d_with_indices(
461  input, kernel_size, stride, padding, dilation, ceil_mode)[0]
462 
464  arg_name='return_indices',
465  arg_index=6,
466  default=False,
467  if_true=max_pool1d_with_indices,
468  if_false=_max_pool1d,
469  module_name=__name__,
470  func_name='max_pool1d')
471 
472 
473 @weak_script
474 def max_pool2d_with_indices(input, kernel_size, stride=None, padding=0, dilation=1,
475  ceil_mode=False, return_indices=False):
476  # type: (Tensor, BroadcastingList2[int], Optional[BroadcastingList2[int]], BroadcastingList2[int], BroadcastingList2[int], bool, bool) -> Tuple[Tensor, Tensor] # noqa
477  r"""Applies a 2D max pooling over an input signal composed of several input
478  planes.
479 
480  See :class:`~torch.nn.MaxPool2d` for details.
481  """
482  if stride is None:
483  stride = torch.jit.annotate(List[int], [])
484  return torch._C._nn.max_pool2d_with_indices(input, kernel_size, stride, padding, dilation, ceil_mode)
485 
486 
487 @weak_script
488 def _max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1,
489  ceil_mode=False, return_indices=False):
490  # type: (Tensor, BroadcastingList2[int], Optional[BroadcastingList2[int]], BroadcastingList2[int], BroadcastingList2[int], bool, bool) -> Tensor # noqa
491  return max_pool2d_with_indices(
492  input, kernel_size, stride, padding, dilation, ceil_mode)[0]
493 
495  arg_name='return_indices',
496  arg_index=6,
497  default=False,
498  if_true=max_pool2d_with_indices,
499  if_false=_max_pool2d,
500  module_name=__name__,
501  func_name='max_pool2d')
502 
503 
504 @weak_script
505 def max_pool3d_with_indices(input, kernel_size, stride=None, padding=0,
506  dilation=1, ceil_mode=False, return_indices=False):
507  # type: (Tensor, BroadcastingList3[int], Optional[BroadcastingList3[int]], BroadcastingList3[int], BroadcastingList3[int], bool, bool) -> Tuple[Tensor, Tensor] # noqa
508  r"""Applies a 3D max pooling over an input signal composed of several input
509  planes.
510 
511  See :class:`~torch.nn.MaxPool3d` for details.
512  """
513  if stride is None:
514  stride = torch.jit.annotate(List[int], [])
515  return torch._C._nn.max_pool3d_with_indices(
516  input, kernel_size, stride, padding, dilation, ceil_mode)
517 
518 
519 @weak_script
520 def _max_pool3d(input, kernel_size, stride=None, padding=0, dilation=1,
521  ceil_mode=False, return_indices=False):
522  # type: (Tensor, BroadcastingList3[int], Optional[BroadcastingList3[int]], BroadcastingList3[int], BroadcastingList3[int], bool, bool) -> Tensor # noqa
523  return max_pool3d_with_indices(
524  input, kernel_size, stride, padding, dilation, ceil_mode)[0]
525 
527  arg_name='return_indices',
528  arg_index=6,
529  default=False,
530  if_true=max_pool3d_with_indices,
531  if_false=_max_pool3d,
532  module_name=__name__,
533  func_name='max_pool3d')
534 
535 
536 @weak_script
537 def _unpool_output_size(input, kernel_size, stride, padding, output_size):
538  # type: (Tensor, List[int], List[int], List[int], Optional[List[int]]) -> List[int]
539  input_size = input.size()
540  default_size = torch.jit.annotate(List[int], [])
541  for d in range(len(kernel_size)):
542  default_size.append((input_size[d + 2] - 1) * stride[d] +
543  kernel_size[d] - 2 * padding[d])
544  if output_size is None:
545  ret = default_size
546  else:
547  if len(output_size) == len(kernel_size) + 2:
548  output_size = output_size[2:]
549  if len(output_size) != len(kernel_size):
550  raise ValueError("output_size should be a sequence containing "
551  "{} or {} elements, but it has a length of '{}'"
552  .format(len(kernel_size), len(kernel_size) + 2,
553  len(output_size)))
554  for d in range(len(kernel_size)):
555  min_size = default_size[d] - stride[d]
556  max_size = default_size[d] + stride[d]
557  if not (min_size < output_size[d] < max_size):
558  raise ValueError(
559  'invalid output_size "{}" (dim {} must be between {} and {})'
560  .format(output_size, d, min_size, max_size))
561 
562  ret = output_size
563  return ret
564 
565 
566 @weak_script
567 def max_unpool1d(input, indices, kernel_size, stride=None, padding=0,
568  output_size=None):
569  # type: (Tensor, Tensor, BroadcastingList1[int], Optional[BroadcastingList1[int]], BroadcastingList1[int], Optional[BroadcastingList1[int]]) -> Tensor # noqa
570  r"""Computes a partial inverse of :class:`MaxPool1d`.
571 
572  See :class:`~torch.nn.MaxUnpool1d` for details.
573  """
574  kernel_size = _single(kernel_size)
575  if stride is not None:
576  _stride = _single(stride)
577  else:
578  _stride = kernel_size
579  padding = _single(padding)
580  output_size = _unpool_output_size(input, kernel_size, _stride, padding,
581  output_size)
582  if isinstance(output_size, list):
583  output_size = output_size + [1]
584  else:
585  output_size = output_size + (1,)
586  return torch._C._nn.max_unpool2d(input.unsqueeze(3), indices.unsqueeze(3),
587  output_size).squeeze(3)
588 
589 
590 @weak_script
591 def max_unpool2d(input, indices, kernel_size, stride=None, padding=0,
592  output_size=None):
593  # type: (Tensor, Tensor, BroadcastingList2[int], Optional[BroadcastingList2[int]], BroadcastingList2[int], Optional[BroadcastingList2[int]]) -> Tensor # noqa
594  r"""Computes a partial inverse of :class:`MaxPool2d`.
595 
596  See :class:`~torch.nn.MaxUnpool2d` for details.
597  """
598  kernel_size = _pair(kernel_size)
599  if stride is not None:
600  _stride = _pair(stride)
601  else:
602  _stride = kernel_size
603  padding = _pair(padding)
604  output_size = _unpool_output_size(input, kernel_size, _stride, padding,
605  output_size)
606  return torch._C._nn.max_unpool2d(input, indices, output_size)
607 
608 
609 @weak_script
610 def max_unpool3d(input, indices, kernel_size, stride=None, padding=0,
611  output_size=None):
612  # type: (Tensor, Tensor, BroadcastingList3[int], Optional[BroadcastingList3[int]], BroadcastingList3[int], Optional[BroadcastingList3[int]]) -> Tensor # noqa
613  r"""Computes a partial inverse of :class:`MaxPool3d`.
614 
615  See :class:`~torch.nn.MaxUnpool3d` for details.
616  """
617  kernel_size = _triple(kernel_size)
618  if stride is not None:
619  _stride = _triple(stride)
620  else:
621  _stride = kernel_size
622  padding = _triple(padding)
623  output_size = _unpool_output_size(input, kernel_size, _stride, padding,
624  output_size)
625  return torch._C._nn.max_unpool3d(
626  input, indices, output_size, _stride, padding)
627 
628 
629 @weak_script
630 def lp_pool2d(input, norm_type, kernel_size, stride=None, ceil_mode=False):
631  # type: (Tensor, float, int, Optional[BroadcastingList2[int]], bool) -> Tensor
632  r"""Applies a 2D power-average pooling over an input signal composed of
633  several input planes. If the sum of all inputs to the power of `p` is
634  zero, the gradient is set to zero as well.
635 
636  See :class:`~torch.nn.LPPool2d` for details.
637  """
638  kw, kh = utils._pair(kernel_size)
639  if stride is not None:
640  out = avg_pool2d(input.pow(norm_type), kernel_size, stride, 0, ceil_mode)
641  else:
642  out = avg_pool2d(input.pow(norm_type), kernel_size, padding=0, ceil_mode=ceil_mode)
643 
644  return (torch.sign(out) * relu(torch.abs(out))).mul(kw * kh).pow(1. / norm_type)
645 
646 
647 @weak_script
648 def lp_pool1d(input, norm_type, kernel_size, stride=None, ceil_mode=False):
649  # type: (Tensor, float, int, Optional[BroadcastingList1[int]], bool) -> Tensor
650  r"""Applies a 1D power-average pooling over an input signal composed of
651  several input planes. If the sum of all inputs to the power of `p` is
652  zero, the gradient is set to zero as well.
653 
654  See :class:`~torch.nn.LPPool1d` for details.
655  """
656  if stride is not None:
657  out = avg_pool1d(input.pow(norm_type), kernel_size, stride, 0, ceil_mode)
658  else:
659  out = avg_pool1d(input.pow(norm_type), kernel_size, padding=0, ceil_mode=ceil_mode)
660 
661  return (torch.sign(out) * relu(torch.abs(out))).mul(kernel_size).pow(1. / norm_type)
662 
663 
664 @weak_script
665 def adaptive_max_pool1d_with_indices(input, output_size, return_indices=False):
666  # type: (Tensor, BroadcastingList1[int], bool) -> Tuple[Tensor, Tensor]
667  r"""Applies a 1D adaptive max pooling over an input signal composed of
668  several input planes.
669 
670  See :class:`~torch.nn.AdaptiveMaxPool1d` for details and output shape.
671 
672  Args:
673  output_size: the target output size (single integer)
674  return_indices: whether to return pooling indices. Default: ``False``
675  """
676  return torch.adaptive_max_pool1d(input, output_size)
677 
678 
679 @weak_script
680 def _adaptive_max_pool1d(input, output_size, return_indices=False):
681  # type: (Tensor, BroadcastingList1[int], bool) -> Tensor
682  return adaptive_max_pool1d_with_indices(input, output_size)[0]
683 
684 adaptive_max_pool1d = torch._jit_internal.boolean_dispatch(
685  arg_name='return_indices',
686  arg_index=2,
687  default=False,
688  if_true=adaptive_max_pool1d_with_indices,
689  if_false=_adaptive_max_pool1d,
690  module_name=__name__,
691  func_name='adaptive_max_pool1d')
692 
693 
694 @weak_script
695 def adaptive_max_pool2d_with_indices(input, output_size, return_indices=False):
696  # type: (Tensor, BroadcastingList1[int], bool) -> Tuple[Tensor, Tensor]
697  r"""Applies a 2D adaptive max pooling over an input signal composed of
698  several input planes.
699 
700  See :class:`~torch.nn.AdaptiveMaxPool2d` for details and output shape.
701 
702  Args:
703  output_size: the target output size (single integer or
704  double-integer tuple)
705  return_indices: whether to return pooling indices. Default: ``False``
706  """
707  output_size = _list_with_default(output_size, input.size())
708  return torch._C._nn.adaptive_max_pool2d(input, output_size)
709 
710 
711 @weak_script
712 def _adaptive_max_pool2d(input, output_size, return_indices=False):
713  # type: (Tensor, BroadcastingList1[int], bool) -> Tensor
714  return adaptive_max_pool2d_with_indices(input, output_size)[0]
715 
716 adaptive_max_pool2d = torch._jit_internal.boolean_dispatch(
717  arg_name='return_indices',
718  arg_index=2,
719  default=False,
720  if_true=adaptive_max_pool2d_with_indices,
721  if_false=_adaptive_max_pool2d,
722  module_name=__name__,
723  func_name='adaptive_max_pool2d')
724 
725 
726 @weak_script
727 def adaptive_max_pool3d_with_indices(input, output_size, return_indices=False):
728  # type: (Tensor, BroadcastingList1[int], bool) -> Tuple[Tensor, Tensor]
729  r"""Applies a 3D adaptive max pooling over an input signal composed of
730  several input planes.
731 
732  See :class:`~torch.nn.AdaptiveMaxPool3d` for details and output shape.
733 
734  Args:
735  output_size: the target output size (single integer or
736  triple-integer tuple)
737  return_indices: whether to return pooling indices. Default: ``False``
738  """
739  output_size = _list_with_default(output_size, input.size())
740  return torch._C._nn.adaptive_max_pool3d(input, output_size)
741 
742 
743 @weak_script
744 def _adaptive_max_pool3d(input, output_size, return_indices=False):
745  # type: (Tensor, BroadcastingList1[int], bool) -> Tensor
746  return adaptive_max_pool3d_with_indices(input, output_size)[0]
747 
748 adaptive_max_pool3d = torch._jit_internal.boolean_dispatch(
749  arg_name='return_indices',
750  arg_index=2,
751  default=False,
752  if_true=adaptive_max_pool3d_with_indices,
753  if_false=_adaptive_max_pool3d,
754  module_name=__name__,
755  func_name='adaptive_max_pool3d')
756 
757 
758 adaptive_avg_pool1d = _add_docstr(torch.adaptive_avg_pool1d, r"""
759 adaptive_avg_pool1d(input, output_size) -> Tensor
760 
761 Applies a 1D adaptive average pooling over an input signal composed of
762 several input planes.
763 
764 See :class:`~torch.nn.AdaptiveAvgPool1d` for details and output shape.
765 
766 Args:
767  output_size: the target output size (single integer)
768 """)
769 
770 
771 @weak_script
772 def adaptive_avg_pool2d(input, output_size):
773  # type: (Tensor, BroadcastingList2[int]) -> Tensor
774  r"""
775  Applies a 2D adaptive average pooling over an input signal composed of
776  several input planes.
777 
778  See :class:`~torch.nn.AdaptiveAvgPool2d` for details and output shape.
779 
780  Args:
781  output_size: the target output size (single integer or
782  double-integer tuple)
783  """
784  _output_size = _list_with_default(output_size, input.size())
785  return torch._C._nn.adaptive_avg_pool2d(input, _output_size)
786 
787 
788 @weak_script
789 def adaptive_avg_pool3d(input, output_size):
790  # type: (Tensor, BroadcastingList3[int]) -> Tensor
791  r"""
792  Applies a 3D adaptive average pooling over an input signal composed of
793  several input planes.
794 
795  See :class:`~torch.nn.AdaptiveAvgPool3d` for details and output shape.
796 
797  Args:
798  output_size: the target output size (single integer or
799  triple-integer tuple)
800  """
801  _output_size = _list_with_default(output_size, input.size())
802  return torch._C._nn.adaptive_avg_pool3d(input, _output_size)
803 
804 
805 # Activation functions
806 @weak_script
807 def dropout(input, p=0.5, training=True, inplace=False):
808  # type: (Tensor, float, bool, bool) -> Tensor
809  r"""
810  During training, randomly zeroes some of the elements of the input
811  tensor with probability :attr:`p` using samples from a Bernoulli
812  distribution.
813 
814  See :class:`~torch.nn.Dropout` for details.
815 
816  Args:
817  p: probability of an element to be zeroed. Default: 0.5
818  training: apply dropout if is ``True``. Default: ``True``
819  inplace: If set to ``True``, will do this operation in-place. Default: ``False``
820  """
821  if p < 0. or p > 1.:
822  raise ValueError("dropout probability has to be between 0 and 1, "
823  "but got {}".format(p))
824  return (_VF.dropout_(input, p, training)
825  if inplace
826  else _VF.dropout(input, p, training))
827 
828 
829 @weak_script
830 def alpha_dropout(input, p=0.5, training=False, inplace=False):
831  # type: (Tensor, float, bool, bool) -> Tensor
832  r"""Applies alpha dropout to the input.
833 
834  See :class:`~torch.nn.AlphaDropout` for details.
835  """
836  if p < 0. or p > 1.:
837  raise ValueError("dropout probability has to be between 0 and 1, "
838  "but got {}".format(p))
839  return (_VF.alpha_dropout_(input, p, training)
840  if inplace
841  else _VF.alpha_dropout(input, p, training))
842 
843 
844 @weak_script
845 def dropout2d(input, p=0.5, training=True, inplace=False):
846  # type: (Tensor, float, bool, bool) -> Tensor
847  r"""
848  Randomly zero out entire channels (a channel is a 2D feature map,
849  e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
850  batched input is a 2D tensor :math:`\text{input}[i, j]`) of the input tensor).
851  Each channel will be zeroed out independently on every forward call with
852  probability :attr:`p` using samples from a Bernoulli distribution.
853 
854  See :class:`~torch.nn.Dropout2d` for details.
855 
856  Args:
857  p: probability of a channel to be zeroed. Default: 0.5
858  training: apply dropout if is ``True``. Default: ``True``
859  inplace: If set to ``True``, will do this operation in-place. Default: ``False``
860  """
861  if p < 0. or p > 1.:
862  raise ValueError("dropout probability has to be between 0 and 1, "
863  "but got {}".format(p))
864  return (_VF.feature_dropout_(input, p, training)
865  if inplace
866  else _VF.feature_dropout(input, p, training))
867 
868 
869 @weak_script
870 def dropout3d(input, p=0.5, training=True, inplace=False):
871  # type: (Tensor, float, bool, bool) -> Tensor
872  r"""
873  Randomly zero out entire channels (a channel is a 3D feature map,
874  e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
875  batched input is a 3D tensor :math:`\text{input}[i, j]`) of the input tensor).
876  Each channel will be zeroed out independently on every forward call with
877  probability :attr:`p` using samples from a Bernoulli distribution.
878 
879  See :class:`~torch.nn.Dropout3d` for details.
880 
881  Args:
882  p: probability of a channel to be zeroed. Default: 0.5
883  training: apply dropout if is ``True``. Default: ``True``
884  inplace: If set to ``True``, will do this operation in-place. Default: ``False``
885  """
886  # This is 100% the same code as dropout2d. We duplicate this code so that
887  # stack traces are not confusing.
888  if p < 0. or p > 1.:
889  raise ValueError("dropout probability has to be between 0 and 1, "
890  "but got {}".format(p))
891  return (_VF.feature_dropout_(input, p, training)
892  if inplace
893  else _VF.feature_dropout(input, p, training))
894 
895 
896 @weak_script
897 def feature_alpha_dropout(input, p=0.5, training=False, inplace=False):
898  # type: (Tensor, float, bool, bool) -> Tensor
899  if p < 0. or p > 1.:
900  raise ValueError("dropout probability has to be between 0 and 1, "
901  "but got {}".format(p))
902  return (_VF.feature_alpha_dropout_(input, p, training)
903  if inplace
904  else _VF.feature_alpha_dropout(input, p, training))
905 
906 
907 @weak_script
908 def threshold(input, threshold, value, inplace=False):
909  # type: (Tensor, float, float, bool) -> Tensor
910  r"""Thresholds each element of the input Tensor.
911 
912  See :class:`~torch.nn.Threshold` for more details.
913  """
914  if inplace:
915  result = _VF.threshold_(input, threshold, value)
916  else:
917  result = _VF.threshold(input, threshold, value)
918  return result
919 
920 
921 threshold_ = _add_docstr(_VF.threshold_, r"""
922 threshold_(input, threshold, value) -> Tensor
923 
924 In-place version of :func:`~threshold`.
925 """)
926 
927 
928 @weak_script
929 def relu(input, inplace=False):
930  # type: (Tensor, bool) -> Tensor
931  r"""relu(input, inplace=False) -> Tensor
932 
933  Applies the rectified linear unit function element-wise. See
934  :class:`~torch.nn.ReLU` for more details.
935  """
936  if inplace:
937  result = torch.relu_(input)
938  else:
939  result = torch.relu(input)
940  return result
941 
942 
943 relu_ = _add_docstr(torch.relu_, r"""
944 relu_(input) -> Tensor
945 
946 In-place version of :func:`~relu`.
947 """)
948 
949 
950 @weak_script
951 def glu(input, dim=-1):
952  # type: (Tensor, int) -> Tensor
953  r"""
954  glu(input, dim=-1) -> Tensor
955 
956  The gated linear unit. Computes:
957 
958  .. math ::
959  \text{GLU}(a, b) = a \otimes \sigma(b)
960 
961  where `input` is split in half along `dim` to form `a` and `b`, :math:`\sigma`
962  is the sigmoid function and :math:`\otimes` is the element-wise product between matrices.
963 
964  See `Language Modeling with Gated Convolutional Networks <https://arxiv.org/abs/1612.08083>`_.
965 
966  Args:
967  input (Tensor): input tensor
968  dim (int): dimension on which to split the input. Default: -1
969  """
970  if input.dim() == 0:
971  raise RuntimeError("glu does not suppport scalars because halving size must be even")
972  return torch._C._nn.glu(input, dim)
973 
974 
975 @weak_script
976 def hardtanh(input, min_val=-1., max_val=1., inplace=False):
977  # type: (Tensor, float, float, bool) -> Tensor
978  r"""
979  hardtanh(input, min_val=-1., max_val=1., inplace=False) -> Tensor
980 
981  Applies the HardTanh function element-wise. See :class:`~torch.nn.Hardtanh` for more
982  details.
983  """
984  if inplace:
985  result = torch._C._nn.hardtanh_(input, min_val, max_val)
986  else:
987  result = torch._C._nn.hardtanh(input, min_val, max_val)
988  return result
989 
990 
991 hardtanh_ = _add_docstr(torch._C._nn.hardtanh_, r"""
992 hardtanh_(input, min_val=-1., max_val=1.) -> Tensor
993 
994 In-place version of :func:`~hardtanh`.
995 """)
996 
997 
998 @weak_script
999 def relu6(input, inplace=False):
1000  # type: (Tensor, bool) -> Tensor
1001  r"""relu6(input, inplace=False) -> Tensor
1002 
1003  Applies the element-wise function :math:`\text{ReLU6}(x) = \min(\max(0,x), 6)`.
1004 
1005  See :class:`~torch.nn.ReLU6` for more details.
1006  """
1007  return hardtanh(input, 0., 6., inplace)
1008 
1009 
1010 @weak_script
1011 def elu(input, alpha=1., inplace=False):
1012  # type: (Tensor, float, bool) -> Tensor
1013  r"""Applies element-wise,
1014  :math:`\text{ELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x) - 1))`.
1015 
1016  See :class:`~torch.nn.ELU` for more details.
1017  """
1018  if inplace:
1019  result = torch._C._nn.elu_(input, alpha)
1020  else:
1021  result = torch._C._nn.elu(input, alpha)
1022  return result
1023 
1024 
1025 elu_ = _add_docstr(torch._C._nn.elu_, r"""
1026 elu_(input, alpha=1.) -> Tensor
1027 
1028 In-place version of :func:`~elu`.
1029 """)
1030 
1031 
1032 @weak_script
1033 def selu(input, inplace=False):
1034  # type: (Tensor, bool) -> Tensor
1035  r"""selu(input, inplace=False) -> Tensor
1036 
1037  Applies element-wise,
1038  :math:`\text{SELU}(x) = scale * (\max(0,x) + \min(0, \alpha * (\exp(x) - 1)))`,
1039  with :math:`\alpha=1.6732632423543772848170429916717` and
1040  :math:`scale=1.0507009873554804934193349852946`.
1041 
1042  See :class:`~torch.nn.SELU` for more details.
1043  """
1044  if inplace:
1045  result = torch.selu_(input)
1046  else:
1047  result = torch.selu(input)
1048  return result
1049 
1050 
1051 selu_ = _add_docstr(torch.selu_, r"""
1052 selu_(input) -> Tensor
1053 
1054 In-place version of :func:`~selu`.
1055 """)
1056 
1057 
1058 @weak_script
1059 def celu(input, alpha=1., inplace=False):
1060  # type: (Tensor, float, bool) -> Tensor
1061  r"""celu(input, alpha=1., inplace=False) -> Tensor
1062 
1063  Applies element-wise,
1064  :math:`\text{CELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x/\alpha) - 1))`.
1065 
1066  See :class:`~torch.nn.CELU` for more details.
1067  """
1068  if inplace:
1069  result = torch.celu_(input, alpha)
1070  else:
1071  result = torch.celu(input, alpha)
1072  return result
1073 
1074 celu_ = _add_docstr(torch.celu_, r"""
1075 celu_(input, alpha=1.) -> Tensor
1076 
1077 In-place version of :func:`~celu`.
1078 """)
1079 
1080 
1081 @weak_script
1082 def leaky_relu(input, negative_slope=0.01, inplace=False):
1083  # type: (Tensor, float, bool) -> Tensor
1084  r"""
1085  leaky_relu(input, negative_slope=0.01, inplace=False) -> Tensor
1086 
1087  Applies element-wise,
1088  :math:`\text{LeakyReLU}(x) = \max(0, x) + \text{negative\_slope} * \min(0, x)`
1089 
1090  See :class:`~torch.nn.LeakyReLU` for more details.
1091  """
1092  if inplace:
1093  result = torch._C._nn.leaky_relu_(input, negative_slope)
1094  else:
1095  result = torch._C._nn.leaky_relu(input, negative_slope)
1096  return result
1097 
1098 
1099 leaky_relu_ = _add_docstr(torch._C._nn.leaky_relu_, r"""
1100 leaky_relu_(input, negative_slope=0.01) -> Tensor
1101 
1102 In-place version of :func:`~leaky_relu`.
1103 """)
1104 
1105 
1106 @weak_script
1107 def prelu(input, weight):
1108  # type: (Tensor, Tensor) -> Tensor
1109  r"""prelu(input, weight) -> Tensor
1110 
1111  Applies element-wise the function
1112  :math:`\text{PReLU}(x) = \max(0,x) + \text{weight} * \min(0,x)` where weight is a
1113  learnable parameter.
1114 
1115  See :class:`~torch.nn.PReLU` for more details.
1116  """
1117  return torch.prelu(input, weight)
1118 
1119 
1120 @weak_script
1121 def rrelu(input, lower=1. / 8, upper=1. / 3, training=False, inplace=False):
1122  # type: (Tensor, float, float, bool, bool) -> Tensor
1123  r"""rrelu(input, lower=1./8, upper=1./3, training=False, inplace=False) -> Tensor
1124 
1125  Randomized leaky ReLU.
1126 
1127  See :class:`~torch.nn.RReLU` for more details.
1128  """
1129  if inplace:
1130  result = torch.rrelu_(input, lower, upper, training)
1131  else:
1132  result = torch.rrelu(input, lower, upper, training)
1133  return result
1134 
1135 
1136 rrelu_ = _add_docstr(torch.rrelu_, r"""
1137 rrelu_(input, lower=1./8, upper=1./3, training=False) -> Tensor
1138 
1139 In-place version of :func:`~rrelu`.
1140 """)
1141 
1142 logsigmoid = _add_docstr(torch._C._nn.log_sigmoid, r"""
1143 logsigmoid(input) -> Tensor
1144 
1145 Applies element-wise :math:`\text{LogSigmoid}(x_i) = \log \left(\frac{1}{1 + \exp(-x_i)}\right)`
1146 
1147 See :class:`~torch.nn.LogSigmoid` for more details.
1148 """)
1149 
1150 
1151 @weak_script
1152 def hardshrink(input, lambd=0.5):
1153  # type: (Tensor, float) -> Tensor
1154  r"""
1155  hardshrink(input, lambd=0.5) -> Tensor
1156 
1157  Applies the hard shrinkage function element-wise
1158 
1159  See :class:`~torch.nn.Hardshrink` for more details.
1160  """
1161  return torch.hardshrink(input, lambd)
1162 
1163 
1164 @weak_script
1165 def tanhshrink(input):
1166  r"""tanhshrink(input) -> Tensor
1167 
1168  Applies element-wise, :math:`\text{Tanhshrink}(x) = x - \text{Tanh}(x)`
1169 
1170  See :class:`~torch.nn.Tanhshrink` for more details.
1171  """
1172  return input - input.tanh()
1173 
1174 
1175 @weak_script
1176 def softsign(input):
1177  r"""softsign(input) -> Tensor
1178 
1179  Applies element-wise, the function :math:`\text{SoftSign}(x) = \frac{x}{1 + |x|}`
1180 
1181  See :class:`~torch.nn.Softsign` for more details.
1182  """
1183  return input / (input.abs() + 1)
1184 
1185 
1186 softplus = _add_docstr(torch._C._nn.softplus, r"""
1187 softplus(input, beta=1, threshold=20) -> Tensor
1188 """)
1189 
1190 
1191 @weak_script
1192 def _get_softmax_dim(name, ndim, stacklevel):
1193  # type: (str, int, int) -> int
1194  warnings.warn("Implicit dimension choice for {} has been deprecated. "
1195  "Change the call to include dim=X as an argument.".format(name), stacklevel=stacklevel)
1196  if ndim == 0 or ndim == 1 or ndim == 3:
1197  ret = 0
1198  else:
1199  ret = 1
1200  return ret
1201 
1202 
1203 @weak_script
1204 def softmin(input, dim=None, _stacklevel=3, dtype=None):
1205  # type: (Tensor, Optional[int], int, Optional[int]) -> Tensor
1206  r"""Applies a softmin function.
1207 
1208  Note that :math:`\text{Softmin}(x) = \text{Softmax}(-x)`. See softmax definition for mathematical formula.
1209 
1210  See :class:`~torch.nn.Softmin` for more details.
1211 
1212  Arguments:
1213  input (Tensor): input
1214  dim (int): A dimension along which softmin will be computed (so every slice
1215  along dim will sum to 1).
1216  dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
1217  If specified, the input tensor is casted to :attr:`dtype` before the operation
1218  is performed. This is useful for preventing data type overflows. Default: None.
1219  """
1220  if dim is None:
1221  dim = _get_softmax_dim('softmin', input.dim(), _stacklevel)
1222  if dtype is None:
1223  ret = (-input).softmax(dim)
1224  else:
1225  ret = (-input).softmax(dim, dtype=dtype)
1226  return ret
1227 
1228 
1229 @weak_script
1230 def softmax(input, dim=None, _stacklevel=3, dtype=None):
1231  # type: (Tensor, Optional[int], int, Optional[int]) -> Tensor
1232  r"""Applies a softmax function.
1233 
1234  Softmax is defined as:
1235 
1236  :math:`\text{Softmax}(x_{i}) = \frac{exp(x_i)}{\sum_j exp(x_j)}`
1237 
1238  It is applied to all slices along dim, and will re-scale them so that the elements
1239  lie in the range `[0, 1]` and sum to 1.
1240 
1241  See :class:`~torch.nn.Softmax` for more details.
1242 
1243  Arguments:
1244  input (Tensor): input
1245  dim (int): A dimension along which softmax will be computed.
1246  dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
1247  If specified, the input tensor is casted to :attr:`dtype` before the operation
1248  is performed. This is useful for preventing data type overflows. Default: None.
1249 
1250  .. note::
1251  This function doesn't work directly with NLLLoss,
1252  which expects the Log to be computed between the Softmax and itself.
1253  Use log_softmax instead (it's faster and has better numerical properties).
1254 
1255  """
1256  if dim is None:
1257  dim = _get_softmax_dim('softmax', input.dim(), _stacklevel)
1258  if dtype is None:
1259  ret = input.softmax(dim)
1260  else:
1261  ret = input.softmax(dim, dtype=dtype)
1262  return ret
1263 
1264 
1265 @weak_script
1266 def gumbel_softmax(logits, tau=1, hard=False, eps=1e-10, dim=-1):
1267  # type: (Tensor, float, bool, float, int) -> Tensor
1268  r"""
1269  Samples from the `Gumbel-Softmax distribution`_ and optionally discretizes.
1270 
1271  Args:
1272  logits: `[..., num_features]` unnormalized log probabilities
1273  tau: non-negative scalar temperature
1274  hard: if ``True``, the returned samples will be discretized as one-hot vectors,
1275  but will be differentiated as if it is the soft sample in autograd
1276  dim (int): A dimension along which softmax will be computed. Default: -1.
1277 
1278  Returns:
1279  Sampled tensor of same shape as `logits` from the Gumbel-Softmax distribution.
1280  If ``hard=True``, the returned samples will be one-hot, otherwise they will
1281  be probability distributions that sum to 1 across `dim`.
1282 
1283  .. note::
1284  This function is here for legacy reasons, may be removed from nn.Functional in the future.
1285 
1286  .. note::
1287  The main trick for `hard` is to do `y_hard - y_soft.detach() + y_soft`
1288 
1289  It achieves two things:
1290  - makes the output value exactly one-hot
1291  (since we add then subtract y_soft value)
1292  - makes the gradient equal to y_soft gradient
1293  (since we strip all other gradients)
1294 
1295  Examples::
1296  >>> logits = torch.randn(20, 32)
1297  >>> # Sample soft categorical using reparametrization trick:
1298  >>> F.gumbel_softmax(logits, tau=1, hard=False)
1299  >>> # Sample hard categorical using "Straight-through" trick:
1300  >>> F.gumbel_softmax(logits, tau=1, hard=True)
1301 
1302  .. _Gumbel-Softmax distribution:
1303  https://arxiv.org/abs/1611.00712
1304  https://arxiv.org/abs/1611.01144
1305  """
1306 
1307  if eps != 1e-10:
1308  warnings.warn("`eps` parameter is deprecated and has no effect.")
1309 
1310  gumbels = -torch.empty_like(logits).exponential_().log() # ~Gumbel(0,1)
1311  gumbels = (logits + gumbels) / tau # ~Gumbel(logits,tau)
1312  y_soft = gumbels.softmax(dim)
1313 
1314  if hard:
1315  # Straight through.
1316  index = y_soft.max(dim, keepdim=True)[1]
1317  y_hard = torch.zeros_like(logits).scatter_(dim, index, 1.0)
1318  ret = y_hard - y_soft.detach() + y_soft
1319  else:
1320  # Reparametrization trick.
1321  ret = y_soft
1322  return ret
1323 
1324 
1325 @weak_script
1326 def log_softmax(input, dim=None, _stacklevel=3, dtype=None):
1327  # type: (Tensor, Optional[int], int, Optional[int]) -> Tensor
1328  r"""Applies a softmax followed by a logarithm.
1329 
1330  While mathematically equivalent to log(softmax(x)), doing these two
1331  operations separately is slower, and numerically unstable. This function
1332  uses an alternative formulation to compute the output and gradient correctly.
1333 
1334  See :class:`~torch.nn.LogSoftmax` for more details.
1335 
1336  Arguments:
1337  input (Tensor): input
1338  dim (int): A dimension along which log_softmax will be computed.
1339  dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
1340  If specified, the input tensor is casted to :attr:`dtype` before the operation
1341  is performed. This is useful for preventing data type overflows. Default: None.
1342  """
1343  if dim is None:
1344  dim = _get_softmax_dim('log_softmax', input.dim(), _stacklevel)
1345  if dtype is None:
1346  ret = input.log_softmax(dim)
1347  else:
1348  ret = input.log_softmax(dim, dtype=dtype)
1349  return ret
1350 
1351 
1352 softshrink = _add_docstr(torch._C._nn.softshrink, r"""
1353 softshrink(input, lambd=0.5) -> Tensor
1354 
1355 Applies the soft shrinkage function elementwise
1356 
1357 See :class:`~torch.nn.Softshrink` for more details.
1358 """)
1359 
1360 
1361 @weak_script
1362 def tanh(input):
1363  r"""tanh(input) -> Tensor
1364 
1365  Applies element-wise,
1366  :math:`\text{Tanh}(x) = \tanh(x) = \frac{\exp(x) - \exp(-x)}{\exp(x) + \exp(-x)}`
1367 
1368  See :class:`~torch.nn.Tanh` for more details.
1369  """
1370  warnings.warn("nn.functional.tanh is deprecated. Use torch.tanh instead.")
1371  return input.tanh()
1372 
1373 
1374 @weak_script
1375 def sigmoid(input):
1376  r"""sigmoid(input) -> Tensor
1377 
1378  Applies the element-wise function :math:`\text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)}`
1379 
1380  See :class:`~torch.nn.Sigmoid` for more details.
1381  """
1382  warnings.warn("nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.")
1383  return input.sigmoid()
1384 
1385 
1386 @weak_script
1387 def linear(input, weight, bias=None):
1388  # type: (Tensor, Tensor, Optional[Tensor]) -> Tensor
1389  r"""
1390  Applies a linear transformation to the incoming data: :math:`y = xA^T + b`.
1391 
1392  Shape:
1393 
1394  - Input: :math:`(N, *, in\_features)` where `*` means any number of
1395  additional dimensions
1396  - Weight: :math:`(out\_features, in\_features)`
1397  - Bias: :math:`(out\_features)`
1398  - Output: :math:`(N, *, out\_features)`
1399  """
1400  if input.dim() == 2 and bias is not None:
1401  # fused op is marginally faster
1402  ret = torch.addmm(bias, input, weight.t())
1403  else:
1404  output = input.matmul(weight.t())
1405  if bias is not None:
1406  output += bias
1407  ret = output
1408  return ret
1409 
1410 
1411 @weak_script
1412 def bilinear(input1, input2, weight, bias=None):
1413  # type: (Tensor, Tensor, Tensor, Optional[Tensor]) -> Tensor
1414  return torch.bilinear(input1, input2, weight, bias)
1415 
1416 
1417 def _no_grad_embedding_renorm_(weight, input, max_norm, norm_type):
1418  # type: (Tensor, Tensor, float, float) -> Tensor
1419  with torch.no_grad():
1420  return torch.embedding_renorm_(weight, input, max_norm, norm_type)
1421 
1422 
1423 @weak_script
1424 def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.,
1425  scale_grad_by_freq=False, sparse=False):
1426  # type: (Tensor, Tensor, Optional[int], Optional[float], float, bool, bool) -> Tensor
1427  r"""A simple lookup table that looks up embeddings in a fixed dictionary and size.
1428 
1429  This module is often used to retrieve word embeddings using indices.
1430  The input to the module is a list of indices, and the embedding matrix,
1431  and the output is the corresponding word embeddings.
1432 
1433  See :class:`torch.nn.Embedding` for more details.
1434 
1435  Args:
1436  input (LongTensor): Tensor containing indices into the embedding matrix
1437  weight (Tensor): The embedding matrix with number of rows equal to the maximum possible index + 1,
1438  and number of columns equal to the embedding size
1439  padding_idx (int, optional): If given, pads the output with the embedding vector at :attr:`padding_idx`
1440  (initialized to zeros) whenever it encounters the index.
1441  max_norm (float, optional): If given, each embedding vector with norm larger than :attr:`max_norm`
1442  is renormalized to have norm :attr:`max_norm`.
1443  Note: this will modify :attr:`weight` in-place.
1444  norm_type (float, optional): The p of the p-norm to compute for the :attr:`max_norm` option. Default ``2``.
1445  scale_grad_by_freq (boolean, optional): If given, this will scale gradients by the inverse of frequency of
1446  the words in the mini-batch. Default ``False``.
1447  sparse (bool, optional): If ``True``, gradient w.r.t. :attr:`weight` will be a sparse tensor. See Notes under
1448  :class:`torch.nn.Embedding` for more details regarding sparse gradients.
1449 
1450  Shape:
1451  - Input: LongTensor of arbitrary shape containing the indices to extract
1452  - Weight: Embedding matrix of floating point type with shape `(V, embedding_dim)`,
1453  where V = maximum index + 1 and embedding_dim = the embedding size
1454  - Output: `(*, embedding_dim)`, where `*` is the input shape
1455 
1456  Examples::
1457 
1458  >>> # a batch of 2 samples of 4 indices each
1459  >>> input = torch.tensor([[1,2,4,5],[4,3,2,9]])
1460  >>> # an embedding matrix containing 10 tensors of size 3
1461  >>> embedding_matrix = torch.rand(10, 3)
1462  >>> F.embedding(input, embedding_matrix)
1463  tensor([[[ 0.8490, 0.9625, 0.6753],
1464  [ 0.9666, 0.7761, 0.6108],
1465  [ 0.6246, 0.9751, 0.3618],
1466  [ 0.4161, 0.2419, 0.7383]],
1467 
1468  [[ 0.6246, 0.9751, 0.3618],
1469  [ 0.0237, 0.7794, 0.0528],
1470  [ 0.9666, 0.7761, 0.6108],
1471  [ 0.3385, 0.8612, 0.1867]]])
1472 
1473  >>> # example with padding_idx
1474  >>> weights = torch.rand(10, 3)
1475  >>> weights[0, :].zero_()
1476  >>> embedding_matrix = weights
1477  >>> input = torch.tensor([[0,2,0,5]])
1478  >>> F.embedding(input, embedding_matrix, padding_idx=0)
1479  tensor([[[ 0.0000, 0.0000, 0.0000],
1480  [ 0.5609, 0.5384, 0.8720],
1481  [ 0.0000, 0.0000, 0.0000],
1482  [ 0.6262, 0.2438, 0.7471]]])
1483  """
1484  if padding_idx is not None:
1485  if padding_idx > 0:
1486  assert padding_idx < weight.size(0), 'Padding_idx must be within num_embeddings'
1487  elif padding_idx < 0:
1488  assert padding_idx >= -weight.size(0), 'Padding_idx must be within num_embeddings'
1489  padding_idx = weight.size(0) + padding_idx
1490  else:
1491  padding_idx = -1
1492  if max_norm is not None:
1493  # `embedding_renorm_` will call .contiguous() on input anyways, so we
1494  # call it here and take advantage of the improved locality in the
1495  # `embedding` call below too.
1496  input = input.contiguous()
1497  # XXX: equivalent to
1498  # with torch.no_grad():
1499  # torch.nembedding_renorm_
1500  # remove once script supports set_grad_enabled
1501  _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
1502  return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
1503 
1504 
1505 @weak_script
1506 def embedding_bag(input, weight, offsets=None, max_norm=None, norm_type=2,
1507  scale_grad_by_freq=False, mode='mean', sparse=False):
1508  # type: (Tensor, Tensor, Optional[Tensor], Optional[float], float, bool, str, bool) -> Tensor
1509  r"""Computes sums, means or maxes of `bags` of embeddings, without instantiating the
1510  intermediate embeddings.
1511 
1512  See :class:`torch.nn.EmbeddingBag` for more details.
1513 
1514  .. include:: cuda_deterministic_backward.rst
1515 
1516  Args:
1517  input (LongTensor): Tensor containing bags of indices into the embedding matrix
1518  weight (Tensor): The embedding matrix with number of rows equal to the maximum possible index + 1,
1519  and number of columns equal to the embedding size
1520  offsets (LongTensor, optional): Only used when :attr:`input` is 1D. :attr:`offsets` determines
1521  the starting index position of each bag (sequence) in :attr:`input`.
1522  max_norm (float, optional): If given, each embedding vector with norm larger than :attr:`max_norm`
1523  is renormalized to have norm :attr:`max_norm`.
1524  Note: this will modify :attr:`weight` in-place.
1525  norm_type (float, optional): The ``p`` in the ``p``-norm to compute for the :attr:`max_norm` option.
1526  Default ``2``.
1527  scale_grad_by_freq (boolean, optional): if given, this will scale gradients by the inverse of frequency of
1528  the words in the mini-batch. Default ``False``.
1529  Note: this option is not supported when ``mode="max"``.
1530  mode (string, optional): ``"sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag.
1531  Default: ``"mean"``
1532  sparse (bool, optional): if ``True``, gradient w.r.t. :attr:`weight` will be a sparse tensor. See Notes under
1533  :class:`torch.nn.Embedding` for more details regarding sparse gradients.
1534  Note: this option is not supported when ``mode="max"``.
1535 
1536  Shape:
1537 
1538  - :attr:`input` (LongTensor) and :attr:`offsets` (LongTensor, optional)
1539 
1540  - If :attr:`input` is 2D of shape `(B, N)`,
1541 
1542  it will be treated as ``B`` bags (sequences) each of fixed length ``N``, and
1543  this will return ``B`` values aggregated in a way depending on the :attr:`mode`.
1544  :attr:`offsets` is ignored and required to be ``None`` in this case.
1545 
1546  - If :attr:`input` is 1D of shape `(N)`,
1547 
1548  it will be treated as a concatenation of multiple bags (sequences).
1549  :attr:`offsets` is required to be a 1D tensor containing the
1550  starting index positions of each bag in :attr:`input`. Therefore,
1551  for :attr:`offsets` of shape `(B)`, :attr:`input` will be viewed as
1552  having ``B`` bags. Empty bags (i.e., having 0-length) will have
1553  returned vectors filled by zeros.
1554 
1555  - :attr:`weight` (Tensor): the learnable weights of the module of
1556  shape `(num_embeddings, embedding_dim)`
1557 
1558  - :attr:`output`: aggregated embedding values of shape `(B, embedding_dim)`
1559 
1560  Examples::
1561 
1562  >>> # an Embedding module containing 10 tensors of size 3
1563  >>> embedding_matrix = torch.rand(10, 3)
1564  >>> # a batch of 2 samples of 4 indices each
1565  >>> input = torch.tensor([1,2,4,5,4,3,2,9])
1566  >>> offsets = torch.tensor([0,4])
1567  >>> F.embedding_bag(embedding_matrix, input, offsets)
1568  tensor([[ 0.3397, 0.3552, 0.5545],
1569  [ 0.5893, 0.4386, 0.5882]])
1570  """
1571  # Check for backward compatibility.
1572  # Used to be embedding_bag(weight, input, ...)
1573  # Now is embedding_bag(input, weight, ...)
1574  if weight.dtype == torch.long and input.is_floating_point():
1575  warnings.warn("Argument order of nn.functional.embedding_bag was changed. "
1576  "Usage `embedding_bag(weight, input, ...)` is deprecated, "
1577  "and should now be `embedding_bag(input, weight, ...)`.")
1578  weight, input = input, weight
1579 
1580  if input.dim() == 2:
1581  if offsets is not None:
1582  raise ValueError("if input is 2D, then offsets has to be None"
1583  ", as input is treated is a mini-batch of"
1584  " fixed length sequences. However, found "
1585  "offsets of type {}".format(type(offsets)))
1586  else:
1587  offsets = torch.arange(0, input.numel(), input.size(1),
1588  dtype=torch.long, device=input.device)
1589 
1590  input = input.reshape(-1)
1591  elif input.dim() == 1:
1592  if offsets is None:
1593  raise ValueError("offsets has to be a 1D Tensor but got None")
1594  offsets = torch.jit._unwrap_optional(offsets)
1595  if offsets.dim() != 1:
1596  raise ValueError("offsets has to be a 1D Tensor")
1597  if int(offsets[0]) != 0:
1598  raise ValueError("offsets[0] has to be 0, i.e., the first sequence "
1599  "in the mini-batch has to start from position 0. "
1600  "However, got {}".format(offsets[0].item()))
1601  if int(offsets[-1]) > input.size(0):
1602  raise ValueError("offsets[-1] can not be greater than input's length"
1603  " ({}), but got offsets[-1] of {}"
1604  .format(input.size(0), offsets[-1].item()))
1605  else:
1606  raise ValueError("input has to be 1D or 2D Tensor,"
1607  " but got Tensor of dimension {}".format(input.dim()))
1608  offsets = torch.jit._unwrap_optional(offsets) # TODO remove when exception control flow logic
1609  if mode == 'sum':
1610  mode_enum = 0
1611  elif mode == 'mean':
1612  mode_enum = 1
1613  elif mode == 'max':
1614  mode_enum = 2
1615 
1616  if scale_grad_by_freq:
1617  raise ValueError("max mode does not support scaling the gradient by the frequency")
1618 
1619  if sparse:
1620  raise ValueError("max mode does not support sparse weights")
1621 
1622  else:
1623  mode_enum = -1 # TODO when exception control flow logic
1624  raise ValueError("mode has to be one of sum, mean or max")
1625 
1626  if max_norm is not None:
1627  # XXX: equivalent to
1628  # with torch.no_grad():
1629  # torch.nembedding_renorm_
1630  # remove once script supports set_grad_enabled
1631  _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
1632 
1633  ret, _, _, _ = torch.embedding_bag(
1634  weight,
1635  input,
1636  offsets,
1637  scale_grad_by_freq,
1638  mode_enum,
1639  sparse)
1640  return ret
1641 
1642 
1643 @weak_script
1644 def batch_norm(input, running_mean, running_var, weight=None, bias=None,
1645  training=False, momentum=0.1, eps=1e-5):
1646  # type: (Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor], bool, float, float) -> Tensor # noqa
1647  r"""Applies Batch Normalization for each channel across a batch of data.
1648 
1649  See :class:`~torch.nn.BatchNorm1d`, :class:`~torch.nn.BatchNorm2d`,
1650  :class:`~torch.nn.BatchNorm3d` for details.
1651  """
1652  if training:
1653  size = input.size()
1654  # XXX: JIT script does not support the reduce from functools, and mul op is a
1655  # builtin, which cannot be used as a value to a func yet, so rewrite this size
1656  # check to a simple equivalent for loop
1657  #
1658  # TODO: make use of reduce like below when JIT is ready with the missing features:
1659  # from operator import mul
1660  # from functools import reduce
1661  #
1662  # if reduce(mul, size[2:], size[0]) == 1
1663  size_prods = size[0]
1664  for i in range(len(size) - 2):
1665  size_prods *= size[i + 2]
1666  if size_prods == 1:
1667  raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))
1668 
1669  return torch.batch_norm(
1670  input, weight, bias, running_mean, running_var,
1671  training, momentum, eps, torch.backends.cudnn.enabled
1672  )
1673 
1674 
1675 @weak_script
1676 def instance_norm(input, running_mean=None, running_var=None, weight=None,
1677  bias=None, use_input_stats=True, momentum=0.1, eps=1e-5):
1678  # type: (Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor], bool, float, float) -> Tensor # noqa
1679  r"""Applies Instance Normalization for each channel in each data sample in a
1680  batch.
1681 
1682  See :class:`~torch.nn.InstanceNorm1d`, :class:`~torch.nn.InstanceNorm2d`,
1683  :class:`~torch.nn.InstanceNorm3d` for details.
1684  """
1685  return torch.instance_norm(
1686  input, weight, bias, running_mean, running_var,
1687  use_input_stats, momentum, eps, torch.backends.cudnn.enabled
1688  )
1689 
1690 
1691 @weak_script
1692 def layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-5):
1693  # type: (Tensor, List[int], Optional[Tensor], Optional[Tensor], float) -> Tensor
1694  r"""Applies Layer Normalization for last certain number of dimensions.
1695 
1696  See :class:`~torch.nn.LayerNorm` for details.
1697  """
1698  return torch.layer_norm(input, normalized_shape, weight, bias, eps,
1699  torch.backends.cudnn.enabled)
1700 
1701 
1702 @weak_script
1703 def group_norm(input, num_groups, weight=None, bias=None, eps=1e-5):
1704  # type: (Tensor, int, Optional[Tensor], Optional[Tensor], float) -> Tensor
1705  r"""Applies Group Normalization for last certain number of dimensions.
1706 
1707  See :class:`~torch.nn.GroupNorm` for details.
1708  """
1709  return torch.group_norm(input, num_groups, weight, bias, eps,
1710  torch.backends.cudnn.enabled)
1711 
1712 
1713 @weak_script
1714 def local_response_norm(input, size, alpha=1e-4, beta=0.75, k=1.):
1715  # type: (Tensor, int, float, float, float) -> Tensor
1716  r"""Applies local response normalization over an input signal composed of
1717  several input planes, where channels occupy the second dimension.
1718  Applies normalization across channels.
1719 
1720  See :class:`~torch.nn.LocalResponseNorm` for details.
1721  """
1722  dim = input.dim()
1723  if dim < 3:
1724  raise ValueError('Expected 3D or higher dimensionality \
1725  input (got {} dimensions)'.format(dim))
1726  div = input.mul(input).unsqueeze(1)
1727  if dim == 3:
1728  div = pad(div, (0, 0, size // 2, (size - 1) // 2))
1729  div = avg_pool2d(div, (size, 1), stride=1).squeeze(1)
1730  else:
1731  sizes = input.size()
1732  div = div.view(sizes[0], 1, sizes[1], sizes[2], -1)
1733  div = pad(div, (0, 0, 0, 0, size // 2, (size - 1) // 2))
1734  div = avg_pool3d(div, (size, 1, 1), stride=1).squeeze(1)
1735  div = div.view(sizes)
1736  div = div.mul(alpha).add(k).pow(beta)
1737  return input / div
1738 
1739 
1740 # loss
1741 
1742 @weak_script
1743 def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0,
1744  reduction='mean', zero_infinity=False):
1745  # type: (Tensor, Tensor, Tensor, Tensor, int, str, bool) -> Tensor
1746  r"""The Connectionist Temporal Classification loss.
1747 
1748  See :class:`~torch.nn.CTCLoss` for details.
1749 
1750  .. include:: cudnn_deterministic.rst
1751  .. include:: cuda_deterministic_backward.rst
1752 
1753  Args:
1754  log_probs: :math:`(T, N, C)` where `C = number of characters in alphabet including blank`,
1755  `T = input length`, and `N = batch size`.
1756  The logarithmized probabilities of the outputs
1757  (e.g. obtained with :func:`torch.nn.functional.log_softmax`).
1758  targets: :math:`(N, S)` or `(sum(target_lengths))`.
1759  Targets cannot be blank. In the second form, the targets are assumed to be concatenated.
1760  input_lengths: :math:`(N)`.
1761  Lengths of the inputs (must each be :math:`\leq T`)
1762  target_lengths: :math:`(N)`.
1763  Lengths of the targets
1764  blank (int, optional):
1765  Blank label. Default :math:`0`.
1766  reduction (string, optional): Specifies the reduction to apply to the output:
1767  ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
1768  ``'mean'``: the output losses will be divided by the target lengths and
1769  then the mean over the batch is taken, ``'sum'``: the output will be
1770  summed. Default: ``'mean'``
1771  zero_infinity (bool, optional):
1772  Whether to zero infinite losses and the associated gradients.
1773  Default: ``False``
1774  Infinite losses mainly occur when the inputs are too short
1775  to be aligned to the targets.
1776 
1777  Example::
1778 
1779  >>> log_probs = torch.randn(50, 16, 20).log_softmax(2).detach().requires_grad_()
1780  >>> targets = torch.randint(1, 20, (16, 30), dtype=torch.long)
1781  >>> input_lengths = torch.full((16,), 50, dtype=torch.long)
1782  >>> target_lengths = torch.randint(10,30,(16,), dtype=torch.long)
1783  >>> loss = F.ctc_loss(log_probs, targets, input_lengths, target_lengths)
1784  >>> loss.backward()
1785  """
1786  return torch.ctc_loss(log_probs, targets, input_lengths, target_lengths, blank, _Reduction.get_enum(reduction),
1787  zero_infinity)
1788 
1789 
1790 @weak_script
1791 def nll_loss(input, target, weight=None, size_average=None, ignore_index=-100,
1792  reduce=None, reduction='mean'):
1793  # type: (Tensor, Tensor, Optional[Tensor], Optional[bool], int, Optional[bool], str) -> Tensor
1794  r"""The negative log likelihood loss.
1795 
1796  See :class:`~torch.nn.NLLLoss` for details.
1797 
1798  Args:
1799  input: :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`
1800  in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)` where :math:`K \geq 1`
1801  in the case of K-dimensional loss.
1802  target: :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`,
1803  or :math:`(N, d_1, d_2, ..., d_K)` where :math:`K \geq 1` for
1804  K-dimensional loss.
1805  weight (Tensor, optional): a manual rescaling weight given to each
1806  class. If given, has to be a Tensor of size `C`
1807  size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
1808  the losses are averaged over each loss element in the batch. Note that for
1809  some losses, there multiple elements per sample. If the field :attr:`size_average`
1810  is set to ``False``, the losses are instead summed for each minibatch. Ignored
1811  when reduce is ``False``. Default: ``True``
1812  ignore_index (int, optional): Specifies a target value that is ignored
1813  and does not contribute to the input gradient. When :attr:`size_average` is
1814  ``True``, the loss is averaged over non-ignored targets. Default: -100
1815  reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
1816  losses are averaged or summed over observations for each minibatch depending
1817  on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
1818  batch element instead and ignores :attr:`size_average`. Default: ``True``
1819  reduction (string, optional): Specifies the reduction to apply to the output:
1820  ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
1821  ``'mean'``: the sum of the output will be divided by the number of
1822  elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
1823  and :attr:`reduce` are in the process of being deprecated, and in the meantime,
1824  specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``
1825 
1826  Example::
1827 
1828  >>> # input is of size N x C = 3 x 5
1829  >>> input = torch.randn(3, 5, requires_grad=True)
1830  >>> # each element in target has to have 0 <= value < C
1831  >>> target = torch.tensor([1, 0, 4])
1832  >>> output = F.nll_loss(F.log_softmax(input), target)
1833  >>> output.backward()
1834  """
1835  if size_average is not None or reduce is not None:
1836  reduction = _Reduction.legacy_get_string(size_average, reduce)
1837  dim = input.dim()
1838  if dim < 2:
1839  raise ValueError('Expected 2 or more dimensions (got {})'.format(dim))
1840 
1841  if input.size(0) != target.size(0):
1842  raise ValueError('Expected input batch_size ({}) to match target batch_size ({}).'
1843  .format(input.size(0), target.size(0)))
1844  if dim == 2:
1845  ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
1846  elif dim == 4:
1847  ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
1848  else:
1849  # dim == 3 or dim > 4
1850  n = input.size(0)
1851  c = input.size(1)
1852  out_size = (n,) + input.size()[2:]
1853  if target.size()[1:] != input.size()[2:]:
1854  raise ValueError('Expected target size {}, got {}'.format(
1855  out_size, target.size()))
1856  input = input.contiguous().view(n, c, 1, -1)
1857  target = target.contiguous().view(n, 1, -1)
1858  reduction_enum = _Reduction.get_enum(reduction)
1859  if reduction != 'none':
1860  ret = torch._C._nn.nll_loss2d(
1861  input, target, weight, reduction_enum, ignore_index)
1862  else:
1863  out = torch._C._nn.nll_loss2d(
1864  input, target, weight, reduction_enum, ignore_index)
1865  ret = out.view(out_size)
1866  return ret
1867 
1868 
1869 @weak_script
1870 def poisson_nll_loss(input, target, log_input=True, full=False, size_average=None, eps=1e-8,
1871  reduce=None, reduction='mean'):
1872  # type: (Tensor, Tensor, bool, bool, Optional[bool], float, Optional[bool], str) -> Tensor
1873  r"""Poisson negative log likelihood loss.
1874 
1875  See :class:`~torch.nn.PoissonNLLLoss` for details.
1876 
1877  Args:
1878  input: expectation of underlying Poisson distribution.
1879  target: random sample :math:`target \sim \text{Poisson}(input)`.
1880  log_input: if ``True`` the loss is computed as
1881  :math:`\exp(\text{input}) - \text{target} * \text{input}`, if ``False`` then loss is
1882  :math:`\text{input} - \text{target} * \log(\text{input}+\text{eps})`. Default: ``True``
1883  full: whether to compute full loss, i. e. to add the Stirling
1884  approximation term. Default: ``False``
1885  :math:`\text{target} * \log(\text{target}) - \text{target} + 0.5 * \log(2 * \pi * \text{target})`.
1886  size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
1887  the losses are averaged over each loss element in the batch. Note that for
1888  some losses, there multiple elements per sample. If the field :attr:`size_average`
1889  is set to ``False``, the losses are instead summed for each minibatch. Ignored
1890  when reduce is ``False``. Default: ``True``
1891  eps (float, optional): Small value to avoid evaluation of :math:`\log(0)` when
1892  :attr:`log_input`=``False``. Default: 1e-8
1893  reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
1894  losses are averaged or summed over observations for each minibatch depending
1895  on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
1896  batch element instead and ignores :attr:`size_average`. Default: ``True``
1897  reduction (string, optional): Specifies the reduction to apply to the output:
1898  ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
1899  ``'mean'``: the sum of the output will be divided by the number of
1900  elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
1901  and :attr:`reduce` are in the process of being deprecated, and in the meantime,
1902  specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``
1903 
1904  """
1905  if size_average is not None or reduce is not None:
1906  reduction = _Reduction.legacy_get_string(size_average, reduce)
1907  if log_input:
1908  loss = torch.exp(input) - target * input
1909  else:
1910  loss = input - target * torch.log(input + eps)
1911  if full:
1912  mask = target > 1
1913  loss[mask] += (target * torch.log(target) - target + 0.5 * torch.log(2 * math.pi * target))[mask]
1914  if reduction == 'none':
1915  ret = loss
1916  elif reduction == 'mean':
1917  ret = torch.mean(loss)
1918  elif reduction == 'sum':
1919  ret = torch.sum(loss)
1920  else:
1921  ret = input
1922  raise ValueError(reduction + " is not valid")
1923  return ret
1924 
1925 
1926 @weak_script
1927 def kl_div(input, target, size_average=None, reduce=None, reduction='mean'):
1928  # type: (Tensor, Tensor, Optional[bool], Optional[bool], str) -> Tensor
1929  r"""The `Kullback-Leibler divergence`_ Loss.
1930 
1931  See :class:`~torch.nn.KLDivLoss` for details.
1932 
1933  Args:
1934  input: Tensor of arbitrary shape
1935  target: Tensor of the same shape as input
1936  size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
1937  the losses are averaged over each loss element in the batch. Note that for
1938  some losses, there multiple elements per sample. If the field :attr:`size_average`
1939  is set to ``False``, the losses are instead summed for each minibatch. Ignored
1940  when reduce is ``False``. Default: ``True``
1941  reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
1942  losses are averaged or summed over observations for each minibatch depending
1943  on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
1944  batch element instead and ignores :attr:`size_average`. Default: ``True``
1945  reduction (string, optional): Specifies the reduction to apply to the output:
1946  ``'none'`` | ``'batchmean'`` | ``'sum'`` | ``'mean'``.
1947  ``'none'``: no reduction will be applied
1948  ``'batchmean'``: the sum of the output will be divided by the batchsize
1949  ``'sum'``: the output will be summed
1950  ``'mean'``: the output will be divided by the number of elements in the output
1951  Default: ``'mean'``
1952 
1953  .. note::
1954  :attr:`size_average` and :attr:`reduce` are in the process of being deprecated,
1955  and in the meantime, specifying either of those two args will override :attr:`reduction`.
1956 
1957  .. note::
1958  :attr:``reduction`` = ``'mean'`` doesn't return the true kl divergence value, please use
1959  :attr:``reduction`` = ``'batchmean'`` which aligns with KL math definition.
1960  In the next major release, ``'mean'`` will be changed to be the same as 'batchmean'.
1961  """
1962  if size_average is not None or reduce is not None:
1963  reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
1964  else:
1965  if reduction == 'mean':
1966  warnings.warn("reduction: 'mean' divides the total loss by both the batch size and the support size."
1967  "'batchmean' divides only by the batch size, and aligns with the KL div math definition."
1968  "'mean' will be changed to behave the same as 'batchmean' in the next major release.")
1969 
1970  # special case for batchmean
1971  if reduction == 'batchmean':
1972  reduction_enum = _Reduction.get_enum('sum')
1973  else:
1974  reduction_enum = _Reduction.get_enum(reduction)
1975 
1976  reduced = torch.kl_div(input, target, reduction_enum)
1977 
1978  if reduction == 'batchmean' and input.dim() != 0:
1979  reduced = reduced / input.size()[0]
1980 
1981  return reduced
1982 
1983 
1984 @weak_script
1985 def cross_entropy(input, target, weight=None, size_average=None, ignore_index=-100,
1986  reduce=None, reduction='mean'):
1987  # type: (Tensor, Tensor, Optional[Tensor], Optional[bool], int, Optional[bool], str) -> Tensor
1988  r"""This criterion combines `log_softmax` and `nll_loss` in a single
1989  function.
1990 
1991  See :class:`~torch.nn.CrossEntropyLoss` for details.
1992 
1993  Args:
1994  input (Tensor) : :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`
1995  in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)` where :math:`K \geq 1`
1996  in the case of K-dimensional loss.
1997  target (Tensor) : :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`,
1998  or :math:`(N, d_1, d_2, ..., d_K)` where :math:`K \geq 1` for
1999  K-dimensional loss.
2000  weight (Tensor, optional): a manual rescaling weight given to each
2001  class. If given, has to be a Tensor of size `C`
2002  size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
2003  the losses are averaged over each loss element in the batch. Note that for
2004  some losses, there multiple elements per sample. If the field :attr:`size_average`
2005  is set to ``False``, the losses are instead summed for each minibatch. Ignored
2006  when reduce is ``False``. Default: ``True``
2007  ignore_index (int, optional): Specifies a target value that is ignored
2008  and does not contribute to the input gradient. When :attr:`size_average` is
2009  ``True``, the loss is averaged over non-ignored targets. Default: -100
2010  reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
2011  losses are averaged or summed over observations for each minibatch depending
2012  on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
2013  batch element instead and ignores :attr:`size_average`. Default: ``True``
2014  reduction (string, optional): Specifies the reduction to apply to the output:
2015  ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
2016  ``'mean'``: the sum of the output will be divided by the number of
2017  elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
2018  and :attr:`reduce` are in the process of being deprecated, and in the meantime,
2019  specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``
2020 
2021  Examples::
2022 
2023  >>> input = torch.randn(3, 5, requires_grad=True)
2024  >>> target = torch.randint(5, (3,), dtype=torch.int64)
2025  >>> loss = F.cross_entropy(input, target)
2026  >>> loss.backward()
2027  """
2028  if size_average is not None or reduce is not None:
2029  reduction = _Reduction.legacy_get_string(size_average, reduce)
2030  return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
2031 
2032 
2033 @weak_script
2034 def binary_cross_entropy(input, target, weight=None, size_average=None,
2035  reduce=None, reduction='mean'):
2036  # type: (Tensor, Tensor, Optional[Tensor], Optional[bool], Optional[bool], str) -> Tensor
2037  r"""Function that measures the Binary Cross Entropy
2038  between the target and the output.
2039 
2040  See :class:`~torch.nn.BCELoss` for details.
2041 
2042  Args:
2043  input: Tensor of arbitrary shape
2044  target: Tensor of the same shape as input
2045  weight (Tensor, optional): a manual rescaling weight
2046  if provided it's repeated to match input tensor shape
2047  size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
2048  the losses are averaged over each loss element in the batch. Note that for
2049  some losses, there multiple elements per sample. If the field :attr:`size_average`
2050  is set to ``False``, the losses are instead summed for each minibatch. Ignored
2051  when reduce is ``False``. Default: ``True``
2052  reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
2053  losses are averaged or summed over observations for each minibatch depending
2054  on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
2055  batch element instead and ignores :attr:`size_average`. Default: ``True``
2056  reduction (string, optional): Specifies the reduction to apply to the output:
2057  ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
2058  ``'mean'``: the sum of the output will be divided by the number of
2059  elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
2060  and :attr:`reduce` are in the process of being deprecated, and in the meantime,
2061  specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``
2062 
2063  Examples::
2064 
2065  >>> input = torch.randn((3, 2), requires_grad=True)
2066  >>> target = torch.rand((3, 2), requires_grad=False)
2067  >>> loss = F.binary_cross_entropy(F.sigmoid(input), target)
2068  >>> loss.backward()
2069  """
2070  if size_average is not None or reduce is not None:
2071  reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2072  else:
2073  reduction_enum = _Reduction.get_enum(reduction)
2074  if not (target.size() == input.size()):
2075  warnings.warn("Using a target size ({}) that is different to the input size ({}) is deprecated. "
2076  "Please ensure they have the same size.".format(target.size(), input.size()))
2077  if input.numel() != target.numel():
2078  raise ValueError("Target and input must have the same number of elements. target nelement ({}) "
2079  "!= input nelement ({})".format(target.numel(), input.numel()))
2080 
2081  if weight is not None:
2082  new_size = _infer_size(target.size(), weight.size())
2083  weight = weight.expand(new_size)
2084 
2085  return torch._C._nn.binary_cross_entropy(
2086  input, target, weight, reduction_enum)
2087 
2088 
2089 @weak_script
2090 def binary_cross_entropy_with_logits(input, target, weight=None, size_average=None,
2091  reduce=None, reduction='mean', pos_weight=None):
2092  # type: (Tensor, Tensor, Optional[Tensor], Optional[bool], Optional[bool], str, Optional[Tensor]) -> Tensor
2093  r"""Function that measures Binary Cross Entropy between target and output
2094  logits.
2095 
2096  See :class:`~torch.nn.BCEWithLogitsLoss` for details.
2097 
2098  Args:
2099  input: Tensor of arbitrary shape
2100  target: Tensor of the same shape as input
2101  weight (Tensor, optional): a manual rescaling weight
2102  if provided it's repeated to match input tensor shape
2103  size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
2104  the losses are averaged over each loss element in the batch. Note that for
2105  some losses, there multiple elements per sample. If the field :attr:`size_average`
2106  is set to ``False``, the losses are instead summed for each minibatch. Ignored
2107  when reduce is ``False``. Default: ``True``
2108  reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
2109  losses are averaged or summed over observations for each minibatch depending
2110  on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
2111  batch element instead and ignores :attr:`size_average`. Default: ``True``
2112  reduction (string, optional): Specifies the reduction to apply to the output:
2113  ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
2114  ``'mean'``: the sum of the output will be divided by the number of
2115  elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
2116  and :attr:`reduce` are in the process of being deprecated, and in the meantime,
2117  specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``
2118  pos_weight (Tensor, optional): a weight of positive examples.
2119  Must be a vector with length equal to the number of classes.
2120 
2121  Examples::
2122 
2123  >>> input = torch.randn(3, requires_grad=True)
2124  >>> target = torch.empty(3).random_(2)
2125  >>> loss = F.binary_cross_entropy_with_logits(input, target)
2126  >>> loss.backward()
2127  """
2128  if size_average is not None or reduce is not None:
2129  reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2130  else:
2131  reduction_enum = _Reduction.get_enum(reduction)
2132 
2133  if not (target.size() == input.size()):
2134  raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
2135 
2136  return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
2137 
2138 
2139 def _pointwise_loss(lambd, lambd_optimized, input, target, reduction='mean'):
2140  if target.requires_grad:
2141  d = lambd(input, target)
2142  if reduction == 'none':
2143  return d
2144  return torch.mean(d) if reduction == 'mean' else torch.sum(d)
2145  else:
2146  expanded_input, expanded_target = torch.broadcast_tensors(input, target)
2147  return lambd_optimized(expanded_input, expanded_target, _Reduction.get_enum(reduction))
2148 
2149 
2150 @weak_script
2151 def _smooth_l1_loss(input, target):
2152  # type: (Tensor, Tensor) -> Tensor
2153  t = torch.abs(input - target)
2154  return torch.where(t < 1, 0.5 * t ** 2, t - 0.5)
2155 
2156 
2157 @weak_script
2158 def smooth_l1_loss(input, target, size_average=None, reduce=None, reduction='mean'):
2159  # type: (Tensor, Tensor, Optional[bool], Optional[bool], str) -> Tensor
2160  r"""Function that uses a squared term if the absolute
2161  element-wise error falls below 1 and an L1 term otherwise.
2162 
2163  See :class:`~torch.nn.SmoothL1Loss` for details.
2164  """
2165  if size_average is not None or reduce is not None:
2166  reduction = _Reduction.legacy_get_string(size_average, reduce)
2167  if target.requires_grad:
2168  ret = _smooth_l1_loss(input, target)
2169  if reduction != 'none':
2170  ret = torch.mean(ret) if reduction == 'mean' else torch.sum(ret)
2171  else:
2172  expanded_input, expanded_target = torch.broadcast_tensors(input, target)
2173  ret = torch._C._nn.smooth_l1_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
2174  return ret
2175 
2176 
2177 @weak_script
2178 def l1_loss(input, target, size_average=None, reduce=None, reduction='mean'):
2179  # type: (Tensor, Tensor, Optional[bool], Optional[bool], str) -> Tensor
2180  r"""l1_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor
2181 
2182  Function that takes the mean element-wise absolute value difference.
2183 
2184  See :class:`~torch.nn.L1Loss` for details.
2185  """
2186  if size_average is not None or reduce is not None:
2187  reduction = _Reduction.legacy_get_string(size_average, reduce)
2188  if target.requires_grad:
2189  ret = torch.abs(input - target)
2190  if reduction != 'none':
2191  ret = torch.mean(ret) if reduction == 'mean' else torch.sum(ret)
2192  else:
2193  expanded_input, expanded_target = torch.broadcast_tensors(input, target)
2194  ret = torch._C._nn.l1_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
2195  return ret
2196 
2197 
2198 @weak_script
2199 def mse_loss(input, target, size_average=None, reduce=None, reduction='mean'):
2200  # type: (Tensor, Tensor, Optional[bool], Optional[bool], str) -> Tensor
2201  r"""mse_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor
2202 
2203  Measures the element-wise mean squared error.
2204 
2205  See :class:`~torch.nn.MSELoss` for details.
2206  """
2207  if size_average is not None or reduce is not None:
2208  reduction = _Reduction.legacy_get_string(size_average, reduce)
2209  if target.requires_grad:
2210  ret = (input - target) ** 2
2211  if reduction != 'none':
2212  ret = torch.mean(ret) if reduction == 'mean' else torch.sum(ret)
2213  else:
2214  expanded_input, expanded_target = torch.broadcast_tensors(input, target)
2215  ret = torch._C._nn.mse_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
2216  return ret
2217 
2218 
2219 @weak_script
2220 def margin_ranking_loss(input1, input2, target, margin=0, size_average=None,
2221  reduce=None, reduction='mean'):
2222  # type: (Tensor, Tensor, Tensor, float, Optional[bool], Optional[bool], str) -> Tensor
2223  r"""margin_ranking_loss(input1, input2, target, margin=0, size_average=None, reduce=None, reduction='mean') -> Tensor
2224 
2225  See :class:`~torch.nn.MarginRankingLoss` for details.
2226  """ # noqa
2227  if size_average is not None or reduce is not None:
2228  reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2229  else:
2230  reduction_enum = _Reduction.get_enum(reduction)
2231  if input1.dim() == 0 or input2.dim() == 0 or target.dim() == 0:
2232  raise RuntimeError(("margin_ranking_loss does not support scalars, got sizes: "
2233  "input1: {}, input2: {}, target: {} ".format(input1.size(), input2.size(), target.size())))
2234  return torch.margin_ranking_loss(input1, input2, target, margin, reduction_enum)
2235 
2236 
2237 @weak_script
2238 def hinge_embedding_loss(input, target, margin=1.0, size_average=None,
2239  reduce=None, reduction='mean'):
2240  # type: (Tensor, Tensor, float, Optional[bool], Optional[bool], str) -> Tensor
2241  r"""hinge_embedding_loss(input, target, margin=1.0, size_average=None, reduce=None, reduction='mean') -> Tensor
2242 
2243  See :class:`~torch.nn.HingeEmbeddingLoss` for details.
2244  """ # noqa
2245  if size_average is not None or reduce is not None:
2246  reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2247  else:
2248  reduction_enum = _Reduction.get_enum(reduction)
2249  return torch.hinge_embedding_loss(input, target, margin, reduction_enum)
2250 
2251 
2252 @weak_script
2253 def multilabel_margin_loss(input, target, size_average=None, reduce=None, reduction='mean'):
2254  # type: (Tensor, Tensor, Optional[bool], Optional[bool], str) -> Tensor
2255  r"""multilabel_margin_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor
2256 
2257  See :class:`~torch.nn.MultiLabelMarginLoss` for details.
2258  """
2259  if size_average is not None or reduce is not None:
2260  reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2261  else:
2262  reduction_enum = _Reduction.get_enum(reduction)
2263  return torch._C._nn.multilabel_margin_loss(input, target, reduction_enum)
2264 
2265 
2266 @weak_script
2267 def soft_margin_loss(input, target, size_average=None, reduce=None, reduction='mean'):
2268  # type: (Tensor, Tensor, Optional[bool], Optional[bool], str) -> Tensor
2269  r"""soft_margin_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor
2270 
2271  See :class:`~torch.nn.SoftMarginLoss` for details.
2272  """
2273  if size_average is not None or reduce is not None:
2274  reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2275  else:
2276  reduction_enum = _Reduction.get_enum(reduction)
2277  return torch._C._nn.soft_margin_loss(input, target, reduction_enum)
2278 
2279 
2280 @weak_script
2281 def multilabel_soft_margin_loss(input, target, weight=None, size_average=None,
2282  reduce=None, reduction='mean'):
2283  # type: (Tensor, Tensor, Optional[Tensor], Optional[bool], Optional[bool], str) -> Tensor
2284  r"""multilabel_soft_margin_loss(input, target, weight=None, size_average=None) -> Tensor
2285 
2286  See :class:`~torch.nn.MultiLabelSoftMarginLoss` for details.
2287  """
2288  if size_average is not None or reduce is not None:
2289  reduction = _Reduction.legacy_get_string(size_average, reduce)
2290 
2291  loss = -(target * logsigmoid(input) + (1 - target) * logsigmoid(-input))
2292 
2293  if weight is not None:
2294  loss = loss * weight
2295 
2296  loss = loss.sum(dim=1) / input.size(1) # only return N loss values
2297 
2298  if reduction == 'none':
2299  ret = loss
2300  elif reduction == 'mean':
2301  ret = loss.mean()
2302  elif reduction == 'sum':
2303  ret = loss.sum()
2304  else:
2305  ret = input
2306  raise ValueError(reduction + " is not valid")
2307  return ret
2308 
2309 
2310 @weak_script
2311 def cosine_embedding_loss(input1, input2, target, margin=0, size_average=None,
2312  reduce=None, reduction='mean'):
2313  # type: (Tensor, Tensor, Tensor, float, Optional[bool], Optional[bool], str) -> Tensor
2314  r"""cosine_embedding_loss(input1, input2, target, margin=0, size_average=None, reduce=None, reduction='mean') -> Tensor
2315 
2316  See :class:`~torch.nn.CosineEmbeddingLoss` for details.
2317  """ # noqa
2318  if size_average is not None or reduce is not None:
2319  reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2320  else:
2321  reduction_enum = _Reduction.get_enum(reduction)
2322  return torch.cosine_embedding_loss(input1, input2, target, margin, reduction_enum)
2323 
2324 
2325 @weak_script
2326 def multi_margin_loss(input, target, p=1, margin=1., weight=None, size_average=None,
2327  reduce=None, reduction='mean'):
2328  # type: (Tensor, Tensor, int, float, Optional[Tensor], Optional[bool], Optional[bool], str) -> Tensor
2329  r"""multi_margin_loss(input, target, p=1, margin=1, weight=None, size_average=None,
2330  reduce=None, reduction='mean') -> Tensor
2331 
2332  See :class:`~torch.nn.MultiMarginLoss` for details.
2333  """
2334  if size_average is not None or reduce is not None:
2335  reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2336  else:
2337  reduction_enum = _Reduction.get_enum(reduction)
2338  if p != 1 and p != 2:
2339  raise ValueError('only p == 1 and p == 2 supported')
2340  if weight is not None:
2341  if weight.dim() != 1:
2342  raise ValueError('weight must be one-dimensional')
2343 
2344  return torch._C._nn.multi_margin_loss(input, target, p, margin, weight, reduction_enum)
2345 
2346 
2347 pixel_shuffle = _add_docstr(torch.pixel_shuffle, r"""
2348 Rearranges elements in a tensor of shape :math:`(*, C \times r^2, H, W)` to a
2349 tensor of shape :math:`(*, C, H \times r, W \times r)`.
2350 
2351 See :class:`~torch.nn.PixelShuffle` for details.
2352 
2353 Args:
2354  input (Tensor): the input tensor
2355  upscale_factor (int): factor to increase spatial resolution by
2356 
2357 Examples::
2358 
2359  >>> input = torch.randn(1, 9, 4, 4)
2360  >>> output = torch.nn.functional.pixel_shuffle(input, 3)
2361  >>> print(output.size())
2362  torch.Size([1, 1, 12, 12])
2363 """)
2364 
2365 
2366 def upsample(input, size=None, scale_factor=None, mode='nearest', align_corners=None):
2367  r"""Upsamples the input to either the given :attr:`size` or the given
2368  :attr:`scale_factor`
2369 
2370  .. warning::
2371  This function is deprecated in favor of :func:`torch.nn.functional.interpolate`.
2372  This is equivalent with ``nn.functional.interpolate(...)``.
2373 
2374  .. include:: cuda_deterministic_backward.rst
2375 
2376  The algorithm used for upsampling is determined by :attr:`mode`.
2377 
2378  Currently temporal, spatial and volumetric upsampling are supported, i.e.
2379  expected inputs are 3-D, 4-D or 5-D in shape.
2380 
2381  The input dimensions are interpreted in the form:
2382  `mini-batch x channels x [optional depth] x [optional height] x width`.
2383 
2384  The modes available for upsampling are: `nearest`, `linear` (3D-only),
2385  `bilinear`, `bicubic` (4D-only), `trilinear` (5D-only)
2386 
2387  Args:
2388  input (Tensor): the input tensor
2389  size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]):
2390  output spatial size.
2391  scale_factor (float or Tuple[float]): multiplier for spatial size. Has to be an integer.
2392  mode (string): algorithm used for upsampling:
2393  ``'nearest'`` | ``'linear'`` | ``'bilinear'`` | ``'bicubic'`` |
2394  ``'trilinear'``. Default: ``'nearest'``
2395  align_corners (bool, optional): Geometrically, we consider the pixels of the
2396  input and output as squares rather than points.
2397  If set to ``True``, the input and output tensors are aligned by the
2398  center points of their corner pixels. If set to ``False``, the input and
2399  output tensors are aligned by the corner points of their corner
2400  pixels, and the interpolation uses edge value padding for out-of-boundary values.
2401  This only has effect when :attr:`mode` is ``'linear'``,
2402  ``'bilinear'``, ``'bicubic'`` or ``'trilinear'``.
2403  Default: ``False``
2404 
2405  .. warning::
2406  With ``align_corners = True``, the linearly interpolating modes
2407  (`linear`, `bilinear`, and `trilinear`) don't proportionally align the
2408  output and input pixels, and thus the output values can depend on the
2409  input size. This was the default behavior for these modes up to version
2410  0.3.1. Since then, the default behavior is ``align_corners = False``.
2411  See :class:`~torch.nn.Upsample` for concrete examples on how this
2412  affects the outputs.
2413 
2414  """
2415  warnings.warn("nn.functional.upsample is deprecated. Use nn.functional.interpolate instead.")
2416  return interpolate(input, size, scale_factor, mode, align_corners)
2417 
2418 
2419 def interpolate(input, size=None, scale_factor=None, mode='nearest', align_corners=None):
2420  r"""Down/up samples the input to either the given :attr:`size` or the given
2421  :attr:`scale_factor`
2422 
2423  The algorithm used for interpolation is determined by :attr:`mode`.
2424 
2425  Currently temporal, spatial and volumetric sampling are supported, i.e.
2426  expected inputs are 3-D, 4-D or 5-D in shape.
2427 
2428  The input dimensions are interpreted in the form:
2429  `mini-batch x channels x [optional depth] x [optional height] x width`.
2430 
2431  The modes available for resizing are: `nearest`, `linear` (3D-only),
2432  `bilinear`, `bicubic` (4D-only), `trilinear` (5D-only), `area`
2433 
2434  Args:
2435  input (Tensor): the input tensor
2436  size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]):
2437  output spatial size.
2438  scale_factor (float or Tuple[float]): multiplier for spatial size. Has to match input size if it is a tuple.
2439  mode (str): algorithm used for upsampling:
2440  ``'nearest'`` | ``'linear'`` | ``'bilinear'`` | ``'bicubic'`` |
2441  ``'trilinear'`` | ``'area'``. Default: ``'nearest'``
2442  align_corners (bool, optional): Geometrically, we consider the pixels of the
2443  input and output as squares rather than points.
2444  If set to ``True``, the input and output tensors are aligned by the
2445  center points of their corner pixels. If set to ``False``, the input and
2446  output tensors are aligned by the corner points of their corner
2447  pixels, and the interpolation uses edge value padding for out-of-boundary values.
2448  This only has effect when :attr:`mode` is ``'linear'``,
2449  ``'bilinear'``, ``'bicubic'``, or ``'trilinear'``.
2450  Default: ``False``
2451 
2452  .. warning::
2453  With ``align_corners = True``, the linearly interpolating modes
2454  (`linear`, `bilinear`, and `trilinear`) don't proportionally align the
2455  output and input pixels, and thus the output values can depend on the
2456  input size. This was the default behavior for these modes up to version
2457  0.3.1. Since then, the default behavior is ``align_corners = False``.
2458  See :class:`~torch.nn.Upsample` for concrete examples on how this
2459  affects the outputs.
2460 
2461  .. include:: cuda_deterministic_backward.rst
2462  """
2463  from numbers import Integral
2464  from .modules.utils import _ntuple
2465 
2466  def _check_size_scale_factor(dim):
2467  if size is None and scale_factor is None:
2468  raise ValueError('either size or scale_factor should be defined')
2469  if size is not None and scale_factor is not None:
2470  raise ValueError('only one of size or scale_factor should be defined')
2471  if scale_factor is not None and isinstance(scale_factor, tuple)\
2472  and len(scale_factor) != dim:
2473  raise ValueError('scale_factor shape must match input shape. '
2474  'Input is {}D, scale_factor size is {}'.format(dim, len(scale_factor)))
2475 
2476  def _output_size(dim):
2477  _check_size_scale_factor(dim)
2478  if size is not None:
2479  return size
2480  scale_factors = _ntuple(dim)(scale_factor)
2481  # math.floor might return float in py2.7
2482  return [int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim)]
2483 
2484  if mode in ('nearest', 'area'):
2485  if align_corners is not None:
2486  raise ValueError("align_corners option can only be set with the "
2487  "interpolating modes: linear | bilinear | bicubic | trilinear")
2488  else:
2489  if align_corners is None:
2490  warnings.warn("Default upsampling behavior when mode={} is changed "
2491  "to align_corners=False since 0.4.0. Please specify "
2492  "align_corners=True if the old behavior is desired. "
2493  "See the documentation of nn.Upsample for details.".format(mode))
2494  align_corners = False
2495 
2496  if input.dim() == 3 and mode == 'nearest':
2497  return torch._C._nn.upsample_nearest1d(input, _output_size(1))
2498  elif input.dim() == 4 and mode == 'nearest':
2499  return torch._C._nn.upsample_nearest2d(input, _output_size(2))
2500  elif input.dim() == 5 and mode == 'nearest':
2501  return torch._C._nn.upsample_nearest3d(input, _output_size(3))
2502  elif input.dim() == 3 and mode == 'area':
2503  return adaptive_avg_pool1d(input, _output_size(1))
2504  elif input.dim() == 4 and mode == 'area':
2505  return adaptive_avg_pool2d(input, _output_size(2))
2506  elif input.dim() == 5 and mode == 'area':
2507  return adaptive_avg_pool3d(input, _output_size(3))
2508  elif input.dim() == 3 and mode == 'linear':
2509  return torch._C._nn.upsample_linear1d(input, _output_size(1), align_corners)
2510  elif input.dim() == 3 and mode == 'bilinear':
2511  raise NotImplementedError("Got 3D input, but bilinear mode needs 4D input")
2512  elif input.dim() == 3 and mode == 'trilinear':
2513  raise NotImplementedError("Got 3D input, but trilinear mode needs 5D input")
2514  elif input.dim() == 4 and mode == 'linear':
2515  raise NotImplementedError("Got 4D input, but linear mode needs 3D input")
2516  elif input.dim() == 4 and mode == 'bilinear':
2517  return torch._C._nn.upsample_bilinear2d(input, _output_size(2), align_corners)
2518  elif input.dim() == 4 and mode == 'trilinear':
2519  raise NotImplementedError("Got 4D input, but trilinear mode needs 5D input")
2520  elif input.dim() == 5 and mode == 'linear':
2521  raise NotImplementedError("Got 5D input, but linear mode needs 3D input")
2522  elif input.dim() == 5 and mode == 'bilinear':
2523  raise NotImplementedError("Got 5D input, but bilinear mode needs 4D input")
2524  elif input.dim() == 5 and mode == 'trilinear':
2525  return torch._C._nn.upsample_trilinear3d(input, _output_size(3), align_corners)
2526  elif input.dim() == 4 and mode == 'bicubic':
2527  return torch._C._nn.upsample_bicubic2d(input, _output_size(2), align_corners)
2528  else:
2529  raise NotImplementedError("Input Error: Only 3D, 4D and 5D input Tensors supported"
2530  " (got {}D) for the modes: nearest | linear | bilinear | bicubic | trilinear"
2531  " (got {})".format(input.dim(), mode))
2532 
2533 
2534 def upsample_nearest(input, size=None, scale_factor=None):
2535  r"""Upsamples the input, using nearest neighbours' pixel values.
2536 
2537  .. warning::
2538  This function is deprecated in favor of :func:`torch.nn.functional.interpolate`.
2539  This is equivalent with ``nn.functional.interpolate(..., mode='nearest')``.
2540 
2541  Currently spatial and volumetric upsampling are supported (i.e. expected
2542  inputs are 4 or 5 dimensional).
2543 
2544  Args:
2545  input (Tensor): input
2546  size (int or Tuple[int, int] or Tuple[int, int, int]): output spatia
2547  size.
2548  scale_factor (int): multiplier for spatial size. Has to be an integer.
2549 
2550  .. include:: cuda_deterministic_backward.rst
2551  """
2552  # DeprecationWarning is ignored by default
2553  warnings.warn("nn.functional.upsample_nearest is deprecated. Use nn.functional.interpolate instead.")
2554  return interpolate(input, size, scale_factor, mode='nearest')
2555 
2556 
2557 def upsample_bilinear(input, size=None, scale_factor=None):
2558  r"""Upsamples the input, using bilinear upsampling.
2559 
2560  .. warning::
2561  This function is deprecated in favor of :func:`torch.nn.functional.interpolate`.
2562  This is equivalent with
2563  ``nn.functional.interpolate(..., mode='bilinear', align_corners=True)``.
2564 
2565  Expected inputs are spatial (4 dimensional). Use `upsample_trilinear` fo
2566  volumetric (5 dimensional) inputs.
2567 
2568  Args:
2569  input (Tensor): input
2570  size (int or Tuple[int, int]): output spatial size.
2571  scale_factor (int or Tuple[int, int]): multiplier for spatial size
2572 
2573  .. include:: cuda_deterministic_backward.rst
2574  """
2575  # DeprecationWarning is ignored by default
2576  warnings.warn("nn.functional.upsample_bilinear is deprecated. Use nn.functional.interpolate instead.")
2577  return interpolate(input, size, scale_factor, mode='bilinear', align_corners=True)
2578 
2579 
2580 GRID_SAMPLE_INTERPOLATION_MODES = {
2581  'bilinear': 0,
2582  'nearest': 1,
2583 }
2584 
2585 GRID_SAMPLE_PADDING_MODES = {
2586  'zeros': 0,
2587  'border': 1,
2588  'reflection': 2,
2589 }
2590 
2591 
2592 @weak_script
2593 def grid_sample(input, grid, mode='bilinear', padding_mode='zeros'):
2594  # type: (Tensor, Tensor, str, str) -> Tensor
2595  r"""Given an :attr:`input` and a flow-field :attr:`grid`, computes the
2596  ``output`` using :attr:`input` values and pixel locations from :attr:`grid`.
2597 
2598  Currently, only spatial (4-D) and volumetric (5-D) :attr:`input` are
2599  supported.
2600 
2601  In the spatial (4-D) case, for :attr:`input` with shape
2602  :math:`(N, C, H_\text{in}, W_\text{in})` and :attr:`grid` with shape
2603  :math:`(N, H_\text{out}, W_\text{out}, 2)`, the output will have shape
2604  :math:`(N, C, H_\text{out}, W_\text{out})`.
2605 
2606  For each output location ``output[n, :, h, w]``, the size-2 vector
2607  ``grid[n, h, w]`` specifies :attr:`input` pixel locations ``x`` and ``y``,
2608  which are used to interpolate the output value ``output[n, :, h, w]``.
2609  In the case of 5D inputs, ``grid[n, d, h, w]`` specifies the
2610  ``x``, ``y``, ``z`` pixel locations for interpolating
2611  ``output[n, :, d, h, w]``. :attr:`mode` argument specifies ``nearest`` or
2612  ``bilinear`` interpolation method to sample the input pixels.
2613 
2614  :attr:`grid` should have most values in the range of ``[-1, 1]``. This is
2615  because the pixel locations are normalized by the :attr:`input` spatial
2616  dimensions. For example, values ``x = -1, y = -1`` is the left-top pixel of
2617  :attr:`input`, and values ``x = 1, y = 1`` is the right-bottom pixel of
2618  :attr:`input`.
2619 
2620  If :attr:`grid` has values outside the range of ``[-1, 1]``, those locations
2621  are handled as defined by :attr:`padding_mode`. Options are
2622 
2623  * ``padding_mode="zeros"``: use ``0`` for out-of-bound values,
2624  * ``padding_mode="border"``: use border values for out-of-bound values,
2625  * ``padding_mode="reflection"``: use values at locations reflected by
2626  the border for out-of-bound values. For location far away from the
2627  border, it will keep being reflected until becoming in bound, e.g.,
2628  (normalized) pixel location ``x = -3.5`` reflects by ``-1`` and
2629  becomes ``x' = 1.5``, then reflects by border ``1`` and becomes
2630  ``x'' = -0.5``.
2631 
2632  .. Note:: This function is often used in building Spatial Transformer Networks.
2633  .. include:: cuda_deterministic_backward.rst
2634 
2635  Args:
2636  input (Tensor): input of shape :math:`(N, C, H_\text{in}, W_\text{in})` (4-D case)
2637  or :math:`(N, C, D_\text{in}, H_\text{in}, W_\text{in})` (5-D case)
2638  grid (Tensor): flow-field of shape :math:`(N, H_\text{out}, W_\text{out}, 2)` (4-D case)
2639  or :math:`(N, D_\text{out}, H_\text{out}, W_\text{out}, 3)` (5-D case)
2640  mode (str): interpolation mode to calculate output values
2641  ``'bilinear'`` | ``'nearest'``. Default: ``'bilinear'``
2642  padding_mode (str): padding mode for outside grid values
2643  ``'zeros'`` | ``'border'`` | ``'reflection'``. Default: ``'zeros'``
2644 
2645  Returns:
2646  output (Tensor): output Tensor
2647 
2648  """
2649  if mode != 'bilinear' and mode != 'nearest':
2650  raise ValueError("nn.functional.grid_sample(): expected mode to be "
2651  "'bilinear' or 'nearest', but got: '{}'".format(mode))
2652  if padding_mode != 'zeros' and padding_mode != 'border' and padding_mode != 'reflection':
2653  raise ValueError("nn.functional.grid_sample(): expected padding_mode "
2654  "to be 'zeros', 'border', or 'reflection', "
2655  "but got: '{}'".format(padding_mode))
2656 
2657  if mode == 'bilinear':
2658  mode_enum = 0
2659  else:
2660  mode_enum = 1
2661 
2662  if padding_mode == 'zeros':
2663  padding_mode_enum = 0
2664  elif padding_mode == 'border':
2665  padding_mode_enum = 1
2666  else:
2667  padding_mode_enum = 2
2668 
2669  return torch.grid_sampler(input, grid, mode_enum, padding_mode_enum)
2670 
2671 
2672 @weak_script
2673 def affine_grid(theta, size):
2674  # type: (Tensor, List[int]) -> Tensor
2675  r"""Generates a 2d flow field, given a batch of affine matrices :attr:`theta`.
2676  Generally used in conjunction with :func:`grid_sample` to
2677  implement Spatial Transformer Networks.
2678 
2679  Args:
2680  theta (Tensor): input batch of affine matrices (:math:`N \times 2 \times 3`)
2681  size (torch.Size): the target output image size (:math:`N \times C \times H \times W`).
2682  Example: torch.Size((32, 3, 24, 24))
2683 
2684  Returns:
2685  output (Tensor): output Tensor of size (:math:`N \times H \times W \times 2`)
2686  """
2687  return vision.affine_grid_generator(theta, size)
2688 
2689 
2690 @weak_script
2691 def pad(input, pad, mode='constant', value=0):
2692  # type: (Tensor, List[int], str, float) -> Tensor
2693  r"""Pads tensor.
2694 
2695  Padding size:
2696  The padding size by which to pad some dimensions of :attr:`input`
2697  are described starting from the last dimension and moving forward.
2698  :math:`\left\lfloor\frac{\text{len(pad)}}{2}\right\rfloor` dimensions
2699  of ``input`` will be padded.
2700  For example, to pad only the last dimension of the input tensor, then
2701  :attr:`pad` has the form
2702  :math:`(\text{padding\_left}, \text{padding\_right})`;
2703  to pad the last 2 dimensions of the input tensor, then use
2704  :math:`(\text{padding\_left}, \text{padding\_right},`
2705  :math:`\text{padding\_top}, \text{padding\_bottom})`;
2706  to pad the last 3 dimensions, use
2707  :math:`(\text{padding\_left}, \text{padding\_right},`
2708  :math:`\text{padding\_top}, \text{padding\_bottom}`
2709  :math:`\text{padding\_front}, \text{padding\_back})`.
2710 
2711  Padding mode:
2712  See :class:`torch.nn.ConstantPad2d`, :class:`torch.nn.ReflectionPad2d`, and
2713  :class:`torch.nn.ReplicationPad2d` for concrete examples on how each of the
2714  padding modes works. Constant padding is implemented for arbitrary dimensions.
2715  Replicate padding is implemented for padding the last 3 dimensions of 5D input
2716  tensor, or the last 2 dimensions of 4D input tensor, or the last dimension of
2717  3D input tensor. Reflect padding is only implemented for padding the last 2
2718  dimensions of 4D input tensor, or the last dimension of 3D input tensor.
2719 
2720  .. include:: cuda_deterministic_backward.rst
2721 
2722  Args:
2723  input (Tensor): N-dimensional tensor
2724  pad (tuple): m-elements tuple, where
2725  :math:`\frac{m}{2} \leq` input dimensions and :math:`m` is even.
2726  mode: ``'constant'``, ``'reflect'``, ``'replicate'`` or ``'circular'``.
2727  Default: ``'constant'``
2728  value: fill value for ``'constant'`` padding. Default: ``0``
2729 
2730  Examples::
2731 
2732  >>> t4d = torch.empty(3, 3, 4, 2)
2733  >>> p1d = (1, 1) # pad last dim by 1 on each side
2734  >>> out = F.pad(t4d, p1d, "constant", 0) # effectively zero padding
2735  >>> print(out.data.size())
2736  torch.Size([3, 3, 4, 4])
2737  >>> p2d = (1, 1, 2, 2) # pad last dim by (1, 1) and 2nd to last by (2, 2)
2738  >>> out = F.pad(t4d, p2d, "constant", 0)
2739  >>> print(out.data.size())
2740  torch.Size([3, 3, 8, 4])
2741  >>> t4d = torch.empty(3, 3, 4, 2)
2742  >>> p3d = (0, 1, 2, 1, 3, 3) # pad by (0, 1), (2, 1), and (3, 3)
2743  >>> out = F.pad(t4d, p3d, "constant", 0)
2744  >>> print(out.data.size())
2745  torch.Size([3, 9, 7, 3])
2746 
2747  """
2748  assert len(pad) % 2 == 0, 'Padding length must be divisible by 2'
2749  assert len(pad) // 2 <= input.dim(), 'Padding length too large'
2750  if mode == 'constant':
2751  ret = _VF.constant_pad_nd(input, pad, value)
2752  else:
2753  assert value == 0, 'Padding mode "{}"" doesn\'t take in value argument'.format(mode)
2754  if input.dim() == 3:
2755  assert len(pad) == 2, '3D tensors expect 2 values for padding'
2756  if mode == 'reflect':
2757  ret = torch._C._nn.reflection_pad1d(input, pad)
2758  elif mode == 'replicate':
2759  ret = torch._C._nn.replication_pad1d(input, pad)
2760  elif mode == 'circular':
2761  ret = pad_circular(input, pad)
2762  else:
2763  ret = input # TODO: remove this when jit raise supports control flow
2764  raise NotImplementedError
2765 
2766  elif input.dim() == 4:
2767  assert len(pad) == 4, '4D tensors expect 4 values for padding'
2768  if mode == 'reflect':
2769  ret = torch._C._nn.reflection_pad2d(input, pad)
2770  elif mode == 'replicate':
2771  ret = torch._C._nn.replication_pad2d(input, pad)
2772  elif mode == 'circular':
2773  ret = pad_circular(input, pad)
2774  else:
2775  ret = input # TODO: remove this when jit raise supports control flow
2776  raise NotImplementedError
2777 
2778  elif input.dim() == 5:
2779  assert len(pad) == 6, '5D tensors expect 6 values for padding'
2780  if mode == 'reflect':
2781  ret = input # TODO: remove this when jit raise supports control flow
2782  raise NotImplementedError
2783  elif mode == 'replicate':
2784  ret = torch._C._nn.replication_pad3d(input, pad)
2785  elif mode == 'circular':
2786  ret = pad_circular(input, pad)
2787  else:
2788  ret = input # TODO: remove this when jit raise supports control flow
2789  raise NotImplementedError
2790  else:
2791  ret = input # TODO: remove this when jit raise supports control flow
2792  raise NotImplementedError("Only 3D, 4D, 5D padding with non-constant padding are supported for now")
2793 
2794  return ret
2795 
2796 # distance
2797 
2798 
2799 @weak_script
2800 def pairwise_distance(x1, x2, p=2., eps=1e-6, keepdim=False):
2801  # type: (Tensor, Tensor, float, float, bool) -> Tensor
2802  r"""
2803  See :class:`torch.nn.PairwiseDistance` for details
2804  """
2805  return torch.pairwise_distance(x1, x2, p, eps, keepdim)
2806 
2807 
2808 pdist = _add_docstr(torch.pdist, r"""
2809 pdist(input, p=2) -> Tensor
2810 
2811 Computes the p-norm distance between every pair of row vectors in the input.
2812 This is identical to the upper triangular portion, excluding the diagonal, of
2813 `torch.norm(input[:, None] - input, dim=2, p=p)`. This function will be faster
2814 if the rows are contiguous.
2815 
2816 If input has shape :math:`N \times M` then the output will have shape
2817 :math:`\frac{1}{2} N (N - 1)`.
2818 
2819 This function is equivalent to `scipy.spatial.distance.pdist(input,
2820 'minkowski', p=p)` if :math:`p \in (0, \infty)`. When :math:`p = 0` it is
2821 equivalent to `scipy.spatial.distance.pdist(input, 'hamming') * M`.
2822 When :math:`p = \infty`, the closest scipy function is
2823 `scipy.spatial.distance.pdist(xn, lambda x, y: np.abs(x - y).max())`.
2824 
2825 Args:
2826  input: input tensor of shape :math:`N \times M`.
2827  p: p value for the p-norm distance to calculate between each vector pair
2828  :math:`\in [0, \infty]`.
2829 """)
2830 
2831 
2832 cosine_similarity = _add_docstr(torch.cosine_similarity, r"""
2833 cosine_similarity(x1, x2, dim=1, eps=1e-8) -> Tensor
2834 
2835 Returns cosine similarity between x1 and x2, computed along dim.
2836 
2837 .. math ::
2838  \text{similarity} = \dfrac{x_1 \cdot x_2}{\max(\Vert x_1 \Vert _2 \cdot \Vert x_2 \Vert _2, \epsilon)}
2839 
2840 Args:
2841  x1 (Tensor): First input.
2842  x2 (Tensor): Second input (of size matching x1).
2843  dim (int, optional): Dimension of vectors. Default: 1
2844  eps (float, optional): Small value to avoid division by zero.
2845  Default: 1e-8
2846 
2847 Shape:
2848  - Input: :math:`(\ast_1, D, \ast_2)` where D is at position `dim`.
2849  - Output: :math:`(\ast_1, \ast_2)` where 1 is at position `dim`.
2850 
2851 Example::
2852 
2853  >>> input1 = torch.randn(100, 128)
2854  >>> input2 = torch.randn(100, 128)
2855  >>> output = F.cosine_similarity(input1, input2)
2856  >>> print(output)
2857 """)
2858 
2859 
2860 one_hot = _add_docstr(torch._C._nn.one_hot, r"""
2861 one_hot(tensor, num_classes=0) -> LongTensor
2862 
2863 Takes LongTensor with index values of shape ``(*)`` and returns a tensor
2864 of shape ``(*, num_classes)`` that have zeros everywhere except where the
2865 index of last dimension matches the corresponding value of the input tensor,
2866 in which case it will be 1.
2867 
2868 See also `One-hot on Wikipedia`_ .
2869 
2870 .. _One-hot on Wikipedia:
2871  https://en.wikipedia.org/wiki/One-hot
2872 
2873 Arguments:
2874  tensor (LongTensor): class values of any shape.
2875  num_classes (int): Total number of classes. If set to -1, the number
2876  of classes will be inferred as one greater than the largest class
2877  value in the input tensor.
2878 
2879 Returns:
2880  LongTensor that has one more dimension with 1 values at the
2881  index of last dimension indicated by the input, and 0 everywhere
2882  else.
2883 
2884 Examples:
2885  >>> F.one_hot(torch.arange(0, 5) % 3)
2886  tensor([[1, 0, 0],
2887  [0, 1, 0],
2888  [0, 0, 1],
2889  [1, 0, 0],
2890  [0, 1, 0]])
2891  >>> F.one_hot(torch.arange(0, 5) % 3, num_classes=5)
2892  tensor([[1, 0, 0, 0, 0],
2893  [0, 1, 0, 0, 0],
2894  [0, 0, 1, 0, 0],
2895  [1, 0, 0, 0, 0],
2896  [0, 1, 0, 0, 0]])
2897  >>> F.one_hot(torch.arange(0, 6).view(3,2) % 3)
2898  tensor([[[1, 0, 0],
2899  [0, 1, 0]],
2900  [[0, 0, 1],
2901  [1, 0, 0]],
2902  [[0, 1, 0],
2903  [0, 0, 1]]])
2904 """)
2905 
2906 
2907 @weak_script
2908 def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-6, swap=False, size_average=None,
2909  reduce=None, reduction="mean"):
2910  # type: (Tensor, Tensor, Tensor, float, float, float, bool, Optional[bool], Optional[bool], str) -> Tensor
2911  r"""
2912  See :class:`~torch.nn.TripletMarginLoss` for details
2913  """
2914  if size_average is not None or reduce is not None:
2915  reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2916  else:
2917  reduction_enum = _Reduction.get_enum(reduction)
2918  return torch.triplet_margin_loss(anchor, positive, negative, margin, p, eps,
2919  swap, reduction_enum)
2920 
2921 
2922 @weak_script
2923 def normalize(input, p=2, dim=1, eps=1e-12, out=None):
2924  # type: (Tensor, float, int, float, Optional[Tensor]) -> Tensor
2925  r"""Performs :math:`L_p` normalization of inputs over specified dimension.
2926 
2927  For a tensor :attr:`input` of sizes :math:`(n_0, ..., n_{dim}, ..., n_k)`, each
2928  :math:`n_{dim}` -element vector :math:`v` along dimension :attr:`dim` is transformed as
2929 
2930  .. math::
2931  v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}.
2932 
2933  With the default arguments it uses the Euclidean norm over vectors along dimension :math:`1` for normalization.
2934 
2935  Args:
2936  input: input tensor of any shape
2937  p (float): the exponent value in the norm formulation. Default: 2
2938  dim (int): the dimension to reduce. Default: 1
2939  eps (float): small value to avoid division by zero. Default: 1e-12
2940  out (Tensor, optional): the output tensor. If :attr:`out` is used, this
2941  operation won't be differentiable.
2942  """
2943  if out is None:
2944  denom = input.norm(p, dim, True).clamp_min(eps).expand_as(input)
2945  ret = input / denom
2946  else:
2947  denom = input.norm(p, dim, True).clamp_min(eps).expand_as(input)
2948  ret = torch.div(input, denom, out=out)
2949  return ret
2950 
2951 
2952 def assert_int_or_pair(arg, arg_name, message):
2953  assert isinstance(arg, int) or len(arg) == 2, message.format(arg_name)
2954 
2955 
2956 @weak_script
2957 def unfold(input, kernel_size, dilation=1, padding=0, stride=1):
2958  # type: (Tensor, BroadcastingList2[int], BroadcastingList2[int], BroadcastingList2[int], BroadcastingList2[int]) -> Tensor # noqa
2959  r"""Extracts sliding local blocks from an batched input tensor.
2960 
2961  .. warning::
2962  Currently, only 4-D input tensors (batched image-like tensors) are
2963  supported.
2964 
2965  .. warning::
2966 
2967  More than one element of the unfolded tensor may refer to a single
2968  memory location. As a result, in-place operations (especially ones that
2969  are vectorized) may result in incorrect behavior. If you need to write
2970  to the tensor, please clone it first.
2971 
2972 
2973  See :class:`torch.nn.Unfold` for details
2974  """
2975 
2976  if input.dim() == 4:
2977  msg = '{} must be int or 2-tuple for 4D input'
2978  assert_int_or_pair(kernel_size, 'kernel_size', msg)
2979  assert_int_or_pair(dilation, 'dilation', msg)
2980  assert_int_or_pair(padding, 'padding', msg)
2981  assert_int_or_pair(stride, 'stride', msg)
2982 
2983  ret = torch._C._nn.thnn_im2col(input, _pair(kernel_size),
2984  _pair(dilation), _pair(padding), _pair(stride))
2985  else:
2986  raise NotImplementedError("Input Error: Only 4D input Tensors are supported (got {}D)".format(input.dim()))
2987  ret = input # TODO: remove when jit supports exception control flow
2988  return ret
2989 
2990 
2991 @weak_script
2992 def fold(input, output_size, kernel_size, dilation=1, padding=0, stride=1):
2993  # type: (Tensor, BroadcastingList2[int], BroadcastingList2[int], BroadcastingList2[int], BroadcastingList2[int], BroadcastingList2[int]) -> Tensor # noqa
2994  r"""Combines an array of sliding local blocks into a large containing
2995  tensor.
2996 
2997  .. warning::
2998  Currently, only 4-D output tensors (batched image-like tensors) are
2999  supported.
3000 
3001  See :class:`torch.nn.Fold` for details
3002  """
3003  if input.dim() == 3:
3004  msg = '{} must be int or 2-tuple for 3D input'
3005  assert_int_or_pair(output_size, 'output_size', msg)
3006  assert_int_or_pair(kernel_size, 'kernel_size', msg)
3007  assert_int_or_pair(dilation, 'dilation', msg)
3008  assert_int_or_pair(padding, 'padding', msg)
3009  assert_int_or_pair(stride, 'stride', msg)
3010 
3011  ret = torch._C._nn.thnn_col2im(input, _pair(output_size), _pair(kernel_size),
3012  _pair(dilation), _pair(padding), _pair(stride))
3013  else:
3014  raise NotImplementedError("Input Error: Only 3D input Tensors are supported (got {}D)".format(input.dim()))
3015  ret = input # TODO: remove when jit supports exception control flow
3016  return ret
3017 
3018 
3019 @weak_script
3020 def pad_circular(input, padding):
3021  # type: (Tensor, List[int]) -> Tensor
3022  """
3023  Arguments
3024  :param input: tensor of shape :math:`(N, C_{\text{in}}, H, [W, D]))`
3025  :param padding: (tuple): m-elem tuple where m is the degree of convolution
3026  Returns
3027  :return: tensor of shape :math:`(N, C_{\text{in}}, [D + 2 * padding[0],
3028  H + 2 * padding[1]], W + 2 * padding[2]))`
3029  """
3030 
3031  input = torch.cat([input, input[:, :, 0:padding[-1]]], dim=2)
3032  input = torch.cat([input[:, :, -(padding[-1] + padding[-2]):-padding[-1]], input], dim=2)
3033 
3034  if len(padding) > 2:
3035  input = torch.cat([input, input[:, :, :, 0:padding[-3]]], dim=3)
3036  input = torch.cat([input[:, :, :, -(padding[-3] + padding[-4]):-padding[-3]], input], dim=3)
3037 
3038  if len(padding) > 4:
3039  input = torch.cat([input, input[:, :, :, :, 0:padding[-5]]], dim=4)
3040  input = torch.cat([input[:, :, :, :, -(padding[-5] + padding[-6]):-padding[-5]], input], dim=4)
3041 
3042  return input
def _unwrap_optional(x)
Definition: __init__.py:1492
def annotate(the_type, the_value)
Definition: __init__.py:1560
Module caffe2.python.helpers.dropout.
def boolean_dispatch(arg_name, arg_index, default, if_true, if_false, module_name, func_name)