1 r"""Functional interface"""     2 from __future__ 
import division
     9 from torch._C import _infer_size, _add_docstr
    10 from . 
import _reduction 
as _Reduction
    11 from . 
import _functions
    12 from .modules 
import utils
    13 from ._functions 
import vision
    14 from .modules.utils 
import _single, _pair, _triple, _list_with_default
    17 from .._jit_internal 
import weak_script, List
    20 conv1d = _add_docstr(torch.conv1d, 
r"""    21 conv1d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros') -> Tensor    23 Applies a 1D convolution over an input signal composed of several input    26 See :class:`~torch.nn.Conv1d` for details and output shape.    28 .. include:: cudnn_deterministic.rst    31     input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)`    32     weight: filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kW)`    33     bias: optional bias of shape :math:`(\text{out\_channels})`. Default: ``None``    34     stride: the stride of the convolving kernel. Can be a single number or    35       a one-element tuple `(sW,)`. Default: 1    36     padding: implicit paddings on both sides of the input. Can be a    37       single number or a one-element tuple `(padW,)`. Default: 0    38     dilation: the spacing between kernel elements. Can be a single number or    39       a one-element tuple `(dW,)`. Default: 1    40     groups: split input into groups, :math:`\text{in\_channels}` should be divisible by    41       the number of groups. Default: 1    42     padding_mode: the type of paddings applied to both sided can be: `zeros` or `circular`. Default: `zeros`    46     >>> filters = torch.randn(33, 16, 3)    47     >>> inputs = torch.randn(20, 16, 50)    48     >>> F.conv1d(inputs, filters)    51 conv2d = _add_docstr(torch.conv2d, 
r"""    52 conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros') -> Tensor    54 Applies a 2D convolution over an input image composed of several input    57 See :class:`~torch.nn.Conv2d` for details and output shape.    59 .. include:: cudnn_deterministic.rst    62     input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`    63     weight: filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kH , kW)`    64     bias: optional bias tensor of shape :math:`(\text{out\_channels})`. Default: ``None``    65     stride: the stride of the convolving kernel. Can be a single number or a    66       tuple `(sH, sW)`. Default: 1    67     padding: implicit paddings on both sides of the input. Can be a    68       single number or a tuple `(padH, padW)`. Default: 0    69     dilation: the spacing between kernel elements. Can be a single number or    70       a tuple `(dH, dW)`. Default: 1    71     groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the    72       number of groups. Default: 1    73     padding_mode: the type of paddings applied to both sided can be: `zeros` or `circular`. Default: `zeros`    77     >>> # With square kernels and equal stride    78     >>> filters = torch.randn(8,4,3,3)    79     >>> inputs = torch.randn(1,4,5,5)    80     >>> F.conv2d(inputs, filters, padding=1)    83 conv3d = _add_docstr(torch.conv3d, 
r"""    84 conv3d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros') -> Tensor    86 Applies a 3D convolution over an input image composed of several input    89 See :class:`~torch.nn.Conv3d` for details and output shape.    91 .. include:: cudnn_deterministic.rst    94     input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iT , iH , iW)`    95     weight: filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kT , kH , kW)`    96     bias: optional bias tensor of shape :math:`(\text{out\_channels})`. Default: None    97     stride: the stride of the convolving kernel. Can be a single number or a    98       tuple `(sT, sH, sW)`. Default: 1    99     padding: implicit paddings on both sides of the input. Can be a   100       single number or a tuple `(padT, padH, padW)`. Default: 0   101     dilation: the spacing between kernel elements. Can be a single number or   102       a tuple `(dT, dH, dW)`. Default: 1   103     groups: split input into groups, :math:`\text{in\_channels}` should be divisible by   104       the number of groups. Default: 1   105     padding_mode: the type of paddings applied to both sided can be: `zeros` or `circular`. Default: `zeros`   109     >>> filters = torch.randn(33, 16, 3, 3, 3)   110     >>> inputs = torch.randn(20, 16, 50, 10, 20)   111     >>> F.conv3d(inputs, filters)   114 conv_transpose1d = _add_docstr(torch.conv_transpose1d, 
r"""   115 conv_transpose1d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1) -> Tensor   117 Applies a 1D transposed convolution operator over an input signal   118 composed of several input planes, sometimes also called "deconvolution".   120 See :class:`~torch.nn.ConvTranspose1d` for details and output shape.   122 .. include:: cudnn_deterministic.rst   125     input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)`   126     weight: filters of shape :math:`(\text{in\_channels} , \frac{\text{out\_channels}}{\text{groups}} , kW)`   127     bias: optional bias of shape :math:`(\text{out\_channels})`. Default: None   128     stride: the stride of the convolving kernel. Can be a single number or a   129       tuple ``(sW,)``. Default: 1   130     padding: ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both   131       sides of each dimension in the input. Can be a single number or a tuple   132       ``(padW,)``. Default: 0   133     output_padding: additional size added to one side of each dimension in the   134       output shape. Can be a single number or a tuple ``(out_padW)``. Default: 0   135     groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the   136       number of groups. Default: 1   137     dilation: the spacing between kernel elements. Can be a single number or   138       a tuple ``(dW,)``. Default: 1   142     >>> inputs = torch.randn(20, 16, 50)   143     >>> weights = torch.randn(16, 33, 5)   144     >>> F.conv_transpose1d(inputs, weights)   147 conv_transpose2d = _add_docstr(torch.conv_transpose2d, 
r"""   148 conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1) -> Tensor   150 Applies a 2D transposed convolution operator over an input image   151 composed of several input planes, sometimes also called "deconvolution".   153 See :class:`~torch.nn.ConvTranspose2d` for details and output shape.   155 .. include:: cudnn_deterministic.rst   158     input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`   159     weight: filters of shape :math:`(\text{in\_channels} , \frac{\text{out\_channels}}{\text{groups}} , kH , kW)`   160     bias: optional bias of shape :math:`(\text{out\_channels})`. Default: None   161     stride: the stride of the convolving kernel. Can be a single number or a   162       tuple ``(sH, sW)``. Default: 1   163     padding: ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both   164       sides of each dimension in the input. Can be a single number or a tuple   165       ``(padH, padW)``. Default: 0   166     output_padding: additional size added to one side of each dimension in the   167       output shape. Can be a single number or a tuple ``(out_padH, out_padW)``.   169     groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the   170       number of groups. Default: 1   171     dilation: the spacing between kernel elements. Can be a single number or   172       a tuple ``(dH, dW)``. Default: 1   176     >>> # With square kernels and equal stride   177     >>> inputs = torch.randn(1, 4, 5, 5)   178     >>> weights = torch.randn(4, 8, 3, 3)   179     >>> F.conv_transpose2d(inputs, weights, padding=1)   182 conv_transpose3d = _add_docstr(torch.conv_transpose3d, 
r"""   183 conv_transpose3d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1) -> Tensor   185 Applies a 3D transposed convolution operator over an input image   186 composed of several input planes, sometimes also called "deconvolution"   188 See :class:`~torch.nn.ConvTranspose3d` for details and output shape.   190 .. include:: cudnn_deterministic.rst   193     input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iT , iH , iW)`   194     weight: filters of shape :math:`(\text{in\_channels} , \frac{\text{out\_channels}}{\text{groups}} , kT , kH , kW)`   195     bias: optional bias of shape :math:`(\text{out\_channels})`. Default: None   196     stride: the stride of the convolving kernel. Can be a single number or a   197       tuple ``(sT, sH, sW)``. Default: 1   198     padding: ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both   199       sides of each dimension in the input. Can be a single number or a tuple   200       ``(padT, padH, padW)``. Default: 0   201     output_padding: additional size added to one side of each dimension in the   202       output shape. Can be a single number or a tuple   203       ``(out_padT, out_padH, out_padW)``. Default: 0   204     groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the   205       number of groups. Default: 1   206     dilation: the spacing between kernel elements. Can be a single number or   207       a tuple `(dT, dH, dW)`. Default: 1   211     >>> inputs = torch.randn(20, 16, 50, 10, 20)   212     >>> weights = torch.randn(16, 33, 3, 3, 3)   213     >>> F.conv_transpose3d(inputs, weights)   216 conv_tbc = _add_docstr(torch.conv_tbc, 
r"""   217 Applies a 1-dimensional sequence convolution over an input sequence.   218 Input and output dimensions are (Time, Batch, Channels) - hence TBC.   221     input: input tensor of shape :math:`(\text{sequence length} \times batch \times \text{in\_channels})`   222     weight: filter of shape (:math:`\text{kernel width} \times \text{in\_channels} \times \text{out\_channels}`)   223     bias: bias of shape (:math:`\text{out\_channels}`)   224     pad: number of timesteps to pad. Default: 0   229 avg_pool1d = _add_docstr(torch.avg_pool1d, 
r"""   230 avg_pool1d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True) -> Tensor   232 Applies a 1D average pooling over an input signal composed of several   235 See :class:`~torch.nn.AvgPool1d` for details and output shape.   238     input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)`   239     kernel_size: the size of the window. Can be a single number or a   241     stride: the stride of the window. Can be a single number or a tuple   242       `(sW,)`. Default: :attr:`kernel_size`   243     padding: implicit zero paddings on both sides of the input. Can be a   244       single number or a tuple `(padW,)`. Default: 0   245     ceil_mode: when True, will use `ceil` instead of `floor` to compute the   246         output shape. Default: ``False``   247     count_include_pad: when True, will include the zero-padding in the   248         averaging calculation. Default: ``True``   252     >>> # pool of square window of size=3, stride=2   253     >>> input = torch.tensor([[[1, 2, 3, 4, 5, 6, 7]]], dtype=torch.float32)   254     >>> F.avg_pool1d(input, kernel_size=3, stride=2)   255     tensor([[[ 2.,  4.,  6.]]])   260 avg_pool2d = _add_docstr(torch._C._nn.avg_pool2d, 
r"""   261 avg_pool2d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True) -> Tensor   263 Applies 2D average-pooling operation in :math:`kH \times kW` regions by step size   264 :math:`sH \times sW` steps. The number of output features is equal to the number of   267 See :class:`~torch.nn.AvgPool2d` for details and output shape.   270     input: input tensor :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`   271     kernel_size: size of the pooling region. Can be a single number or a   273     stride: stride of the pooling operation. Can be a single number or a   274       tuple `(sH, sW)`. Default: :attr:`kernel_size`   275     padding: implicit zero paddings on both sides of the input. Can be a   276       single number or a tuple `(padH, padW)`. Default: 0   277     ceil_mode: when True, will use `ceil` instead of `floor` in the formula   278         to compute the output shape. Default: ``False``   279     count_include_pad: when True, will include the zero-padding in the   280         averaging calculation. Default: ``True``   283 avg_pool3d = _add_docstr(torch._C._nn.avg_pool3d, 
r"""   284 avg_pool3d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True) -> Tensor   286 Applies 3D average-pooling operation in :math:`kT \times kH \times kW` regions by step   287 size :math:`sT \times sH \times sW` steps. The number of output features is equal to   288 :math:`\lfloor\frac{\text{input planes}}{sT}\rfloor`.   290 See :class:`~torch.nn.AvgPool3d` for details and output shape.   293     input: input tensor :math:`(\text{minibatch} , \text{in\_channels} , iT \times iH , iW)`   294     kernel_size: size of the pooling region. Can be a single number or a   296     stride: stride of the pooling operation. Can be a single number or a   297       tuple `(sT, sH, sW)`. Default: :attr:`kernel_size`   298     padding: implicit zero paddings on both sides of the input. Can be a   299       single number or a tuple `(padT, padH, padW)`, Default: 0   300     ceil_mode: when True, will use `ceil` instead of `floor` in the formula   301         to compute the output shape   302     count_include_pad: when True, will include the zero-padding in the   303         averaging calculation   308 def fractional_max_pool2d_with_indices(input, kernel_size, output_size=None,
   309                                        output_ratio=
None, return_indices=
False,
   310                                        _random_samples=
None):
   312     r"""Applies 2D fractional max pooling over an input signal composed of several input planes.   314     Fractional MaxPooling is described in detail in the paper `Fractional MaxPooling`_ by Ben Graham   316     The max-pooling operation is applied in :math:`kH \times kW` regions by a stochastic   317     step size determined by the target output size.   318     The number of output features is equal to the number of input planes.   321         kernel_size: the size of the window to take a max over.   322                      Can be a single number :math:`k` (for a square kernel of :math:`k \times k`)   323                      or a tuple `(kH, kW)`   324         output_size: the target output size of the image of the form :math:`oH \times oW`.   325                      Can be a tuple `(oH, oW)` or a single number :math:`oH` for a square image :math:`oH \times oH`   326         output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given.   327                       This has to be a number or tuple in the range (0, 1)   328         return_indices: if ``True``, will return the indices along with the outputs.   329                         Useful to pass to :func:`~torch.nn.functional.max_unpool2d`.   332         >>> input = torch.randn(20, 16, 50, 32)   333         >>> # pool of square window of size=3, and target output size 13x12   334         >>> F.fractional_max_pool2d(input, 3, output_size=(13, 12))   335         >>> # pool of square window and target output size being half of input image size   336         >>> F.fractional_max_pool2d(input, 3, output_ratio=(0.5, 0.5))   338     .. _Fractional MaxPooling:   339         http://arxiv.org/abs/1412.6071   341     if output_size 
is None and output_ratio 
is None:
   342         raise ValueError(
"fractional_max_pool2d requires specifying either "   343                          "an output_size or an output_ratio")
   344     if output_size 
is None:
   346         output_size = [int(input.size(2) * _output_ratio[0]),
   347                        int(input.size(3) * _output_ratio[1])]
   349     if _random_samples 
is None:
   350         _random_samples = torch.rand(input.size(0), input.size(1), 2, dtype=input.dtype, device=input.device)
   351     return torch._C._nn.fractional_max_pool2d(input, kernel_size, output_size, _random_samples)
   355 def _fractional_max_pool2d(input, kernel_size, output_size=None,
   356                            output_ratio=
None, return_indices=
False,
   357                            _random_samples=
None):
   359     return fractional_max_pool2d_with_indices(input, kernel_size, output_size,
   360                                               output_ratio, return_indices,
   364     arg_name=
'return_indices',
   367     if_true=fractional_max_pool2d_with_indices,
   368     if_false=_fractional_max_pool2d,
   369     module_name=__name__,
   370     func_name=
'fractional_max_pool2d')
   374 def fractional_max_pool3d_with_indices(input, kernel_size, output_size=None,
   375                                        output_ratio=
None, return_indices=
False,
   376                                        _random_samples=
None):
   378     r"""Applies 3D fractional max pooling over an input signal composed of several input planes.   380     Fractional MaxPooling is described in detail in the paper `Fractional MaxPooling`_ by Ben Graham   382     The max-pooling operation is applied in :math:`kT \times kH \times kW` regions by a stochastic   383     step size determined by the target output size.   384     The number of output features is equal to the number of input planes.   387         kernel_size: the size of the window to take a max over.   388                      Can be a single number :math:`k` (for a square kernel of :math:`k \times k \times k`)   389                      or a tuple `(kT, kH, kW)`   390         output_size: the target output size of the form :math:`oT \times oH \times oW`.   391                      Can be a tuple `(oT, oH, oW)` or a single number :math:`oH` for a cubic output   392                       :math:`oH \times oH \times oH`   393         output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given.   394                       This has to be a number or tuple in the range (0, 1)   395         return_indices: if ``True``, will return the indices along with the outputs.   396                         Useful to pass to :func:`~torch.nn.functional.max_unpool3d`.   399         >>> input = torch.randn(20, 16, 50, 32, 16)   400         >>> # pool of cubic window of size=3, and target output size 13x12x11   401         >>> F.fractional_max_pool3d(input, 3, output_size=(13, 12, 11))   402         >>> # pool of cubic window and target output size being half of input size   403         >>> F.fractional_max_pool3d(input, 3, output_ratio=(0.5, 0.5, 0.5))   405     .. _Fractional MaxPooling:   406         http://arxiv.org/abs/1412.6071   408     if output_size 
is None and output_ratio 
is None:
   409         raise ValueError(
"fractional_max_pool3d requires specifying either "   410                          "an output_size or an output_ratio")
   411     if output_size 
is None:
   413         output_size = [int(input.size(2) * _output_ratio[0]),
   414                        int(input.size(3) * _output_ratio[1]),
   415                        int(input.size(4) * _output_ratio[2])]
   417     if _random_samples 
is None:
   418         _random_samples = torch.rand(input.size(0), input.size(1), 3, dtype=input.dtype, device=input.device)
   419     return torch._C._nn.fractional_max_pool3d(input, kernel_size, output_size, _random_samples)
   423 def _fractional_max_pool3d(input, kernel_size, output_size=None,
   424                            output_ratio=
None, return_indices=
False,
   425                            _random_samples=
None):
   427     return fractional_max_pool3d_with_indices(input, kernel_size, output_size,
   428                                               output_ratio, return_indices,
   432     arg_name=
'return_indices',
   435     if_true=fractional_max_pool3d_with_indices,
   436     if_false=_fractional_max_pool3d,
   437     module_name=__name__,
   438     func_name=
'fractional_max_pool3d')
   442 def max_pool1d_with_indices(input, kernel_size, stride=None, padding=0,
   443                             dilation=1, ceil_mode=
False, return_indices=
False):
   445     r"""Applies a 1D max pooling over an input signal composed of several input   448     See :class:`~torch.nn.MaxPool1d` for details.   452     return torch.max_pool1d_with_indices(
   453         input, kernel_size, stride, padding, dilation, ceil_mode)
   457 def _max_pool1d(input, kernel_size, stride=None, padding=0, dilation=1,
   458                 ceil_mode=
False, return_indices=
False):
   460     return max_pool1d_with_indices(
   461         input, kernel_size, stride, padding, dilation, ceil_mode)[0]
   464     arg_name=
'return_indices',
   467     if_true=max_pool1d_with_indices,
   468     if_false=_max_pool1d,
   469     module_name=__name__,
   470     func_name=
'max_pool1d')
   474 def max_pool2d_with_indices(input, kernel_size, stride=None, padding=0, dilation=1,
   475                             ceil_mode=
False, return_indices=
False):
   477     r"""Applies a 2D max pooling over an input signal composed of several input   480     See :class:`~torch.nn.MaxPool2d` for details.   484     return torch._C._nn.max_pool2d_with_indices(input, kernel_size, stride, padding, dilation, ceil_mode)
   488 def _max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1,
   489                 ceil_mode=
False, return_indices=
False):
   491     return max_pool2d_with_indices(
   492         input, kernel_size, stride, padding, dilation, ceil_mode)[0]
   495     arg_name=
'return_indices',
   498     if_true=max_pool2d_with_indices,
   499     if_false=_max_pool2d,
   500     module_name=__name__,
   501     func_name=
'max_pool2d')
   505 def max_pool3d_with_indices(input, kernel_size, stride=None, padding=0,
   506                             dilation=1, ceil_mode=
False, return_indices=
False):
   508     r"""Applies a 3D max pooling over an input signal composed of several input   511     See :class:`~torch.nn.MaxPool3d` for details.   515     return torch._C._nn.max_pool3d_with_indices(
   516         input, kernel_size, stride, padding, dilation, ceil_mode)
   520 def _max_pool3d(input, kernel_size, stride=None, padding=0, dilation=1,
   521                 ceil_mode=
False, return_indices=
False):
   523     return max_pool3d_with_indices(
   524         input, kernel_size, stride, padding, dilation, ceil_mode)[0]
   527     arg_name=
'return_indices',
   530     if_true=max_pool3d_with_indices,
   531     if_false=_max_pool3d,
   532     module_name=__name__,
   533     func_name=
'max_pool3d')
   537 def _unpool_output_size(input, kernel_size, stride, padding, output_size):
   539     input_size = input.size()
   541     for d 
in range(len(kernel_size)):
   542         default_size.append((input_size[d + 2] - 1) * stride[d] +
   543                             kernel_size[d] - 2 * padding[d])
   544     if output_size 
is None:
   547         if len(output_size) == len(kernel_size) + 2:
   548             output_size = output_size[2:]
   549         if len(output_size) != len(kernel_size):
   550             raise ValueError(
"output_size should be a sequence containing "   551                              "{} or {} elements, but it has a length of '{}'"   552                              .format(len(kernel_size), len(kernel_size) + 2,
   554         for d 
in range(len(kernel_size)):
   555             min_size = default_size[d] - stride[d]
   556             max_size = default_size[d] + stride[d]
   557             if not (min_size < output_size[d] < max_size):
   559                     'invalid output_size "{}" (dim {} must be between {} and {})'   560                     .format(output_size, d, min_size, max_size))
   567 def max_unpool1d(input, indices, kernel_size, stride=None, padding=0,
   570     r"""Computes a partial inverse of :class:`MaxPool1d`.   572     See :class:`~torch.nn.MaxUnpool1d` for details.   574     kernel_size = _single(kernel_size)
   575     if stride 
is not None:
   576         _stride = _single(stride)
   578         _stride = kernel_size
   579     padding = _single(padding)
   580     output_size = _unpool_output_size(input, kernel_size, _stride, padding,
   582     if isinstance(output_size, list):
   583         output_size = output_size + [1]
   585         output_size = output_size + (1,)
   586     return torch._C._nn.max_unpool2d(input.unsqueeze(3), indices.unsqueeze(3),
   587                                      output_size).squeeze(3)
   591 def max_unpool2d(input, indices, kernel_size, stride=None, padding=0,
   594     r"""Computes a partial inverse of :class:`MaxPool2d`.   596     See :class:`~torch.nn.MaxUnpool2d` for details.   598     kernel_size = _pair(kernel_size)
   599     if stride 
is not None:
   600         _stride = _pair(stride)
   602         _stride = kernel_size
   603     padding = _pair(padding)
   604     output_size = _unpool_output_size(input, kernel_size, _stride, padding,
   606     return torch._C._nn.max_unpool2d(input, indices, output_size)
   610 def max_unpool3d(input, indices, kernel_size, stride=None, padding=0,
   613     r"""Computes a partial inverse of :class:`MaxPool3d`.   615     See :class:`~torch.nn.MaxUnpool3d` for details.   617     kernel_size = _triple(kernel_size)
   618     if stride 
is not None:
   619         _stride = _triple(stride)
   621         _stride = kernel_size
   622     padding = _triple(padding)
   623     output_size = _unpool_output_size(input, kernel_size, _stride, padding,
   625     return torch._C._nn.max_unpool3d(
   626         input, indices, output_size, _stride, padding)
   630 def lp_pool2d(input, norm_type, kernel_size, stride=None, ceil_mode=False):
   632     r"""Applies a 2D power-average pooling over an input signal composed of   633     several input planes. If the sum of all inputs to the power of `p` is   634     zero, the gradient is set to zero as well.   636     See :class:`~torch.nn.LPPool2d` for details.   638     kw, kh = utils._pair(kernel_size)
   639     if stride 
is not None:
   640         out = avg_pool2d(input.pow(norm_type), kernel_size, stride, 0, ceil_mode)
   642         out = avg_pool2d(input.pow(norm_type), kernel_size, padding=0, ceil_mode=ceil_mode)
   644     return (torch.sign(out) * relu(torch.abs(out))).mul(kw * kh).pow(1. / norm_type)
   648 def lp_pool1d(input, norm_type, kernel_size, stride=None, ceil_mode=False):
   650     r"""Applies a 1D power-average pooling over an input signal composed of   651     several input planes. If the sum of all inputs to the power of `p` is   652     zero, the gradient is set to zero as well.   654     See :class:`~torch.nn.LPPool1d` for details.   656     if stride 
is not None:
   657         out = avg_pool1d(input.pow(norm_type), kernel_size, stride, 0, ceil_mode)
   659         out = avg_pool1d(input.pow(norm_type), kernel_size, padding=0, ceil_mode=ceil_mode)
   661     return (torch.sign(out) * relu(torch.abs(out))).mul(kernel_size).pow(1. / norm_type)
   665 def adaptive_max_pool1d_with_indices(input, output_size, return_indices=False):
   667     r"""Applies a 1D adaptive max pooling over an input signal composed of   668     several input planes.   670     See :class:`~torch.nn.AdaptiveMaxPool1d` for details and output shape.   673         output_size: the target output size (single integer)   674         return_indices: whether to return pooling indices. Default: ``False``   676     return torch.adaptive_max_pool1d(input, output_size)
   680 def _adaptive_max_pool1d(input, output_size, return_indices=False):
   682     return adaptive_max_pool1d_with_indices(input, output_size)[0]
   685     arg_name=
'return_indices',
   688     if_true=adaptive_max_pool1d_with_indices,
   689     if_false=_adaptive_max_pool1d,
   690     module_name=__name__,
   691     func_name=
'adaptive_max_pool1d')
   695 def adaptive_max_pool2d_with_indices(input, output_size, return_indices=False):
   697     r"""Applies a 2D adaptive max pooling over an input signal composed of   698     several input planes.   700     See :class:`~torch.nn.AdaptiveMaxPool2d` for details and output shape.   703         output_size: the target output size (single integer or   704             double-integer tuple)   705         return_indices: whether to return pooling indices. Default: ``False``   707     output_size = _list_with_default(output_size, input.size())
   708     return torch._C._nn.adaptive_max_pool2d(input, output_size)
   712 def _adaptive_max_pool2d(input, output_size, return_indices=False):
   714     return adaptive_max_pool2d_with_indices(input, output_size)[0]
   717     arg_name=
'return_indices',
   720     if_true=adaptive_max_pool2d_with_indices,
   721     if_false=_adaptive_max_pool2d,
   722     module_name=__name__,
   723     func_name=
'adaptive_max_pool2d')
   727 def adaptive_max_pool3d_with_indices(input, output_size, return_indices=False):
   729     r"""Applies a 3D adaptive max pooling over an input signal composed of   730     several input planes.   732     See :class:`~torch.nn.AdaptiveMaxPool3d` for details and output shape.   735         output_size: the target output size (single integer or   736             triple-integer tuple)   737         return_indices: whether to return pooling indices. Default: ``False``   739     output_size = _list_with_default(output_size, input.size())
   740     return torch._C._nn.adaptive_max_pool3d(input, output_size)
   744 def _adaptive_max_pool3d(input, output_size, return_indices=False):
   746     return adaptive_max_pool3d_with_indices(input, output_size)[0]
   749     arg_name=
'return_indices',
   752     if_true=adaptive_max_pool3d_with_indices,
   753     if_false=_adaptive_max_pool3d,
   754     module_name=__name__,
   755     func_name=
'adaptive_max_pool3d')
   758 adaptive_avg_pool1d = _add_docstr(torch.adaptive_avg_pool1d, 
r"""   759 adaptive_avg_pool1d(input, output_size) -> Tensor   761 Applies a 1D adaptive average pooling over an input signal composed of   762 several input planes.   764 See :class:`~torch.nn.AdaptiveAvgPool1d` for details and output shape.   767     output_size: the target output size (single integer)   772 def adaptive_avg_pool2d(input, output_size):
   775     Applies a 2D adaptive average pooling over an input signal composed of   776     several input planes.   778     See :class:`~torch.nn.AdaptiveAvgPool2d` for details and output shape.   781         output_size: the target output size (single integer or   782             double-integer tuple)   784     _output_size = _list_with_default(output_size, input.size())
   785     return torch._C._nn.adaptive_avg_pool2d(input, _output_size)
   789 def adaptive_avg_pool3d(input, output_size):
   792     Applies a 3D adaptive average pooling over an input signal composed of   793     several input planes.   795     See :class:`~torch.nn.AdaptiveAvgPool3d` for details and output shape.   798         output_size: the target output size (single integer or   799             triple-integer tuple)   801     _output_size = _list_with_default(output_size, input.size())
   802     return torch._C._nn.adaptive_avg_pool3d(input, _output_size)
   807 def dropout(input, p=0.5, training=True, inplace=False):
   810     During training, randomly zeroes some of the elements of the input   811     tensor with probability :attr:`p` using samples from a Bernoulli   814     See :class:`~torch.nn.Dropout` for details.   817         p: probability of an element to be zeroed. Default: 0.5   818         training: apply dropout if is ``True``. Default: ``True``   819         inplace: If set to ``True``, will do this operation in-place. Default: ``False``   822         raise ValueError(
"dropout probability has to be between 0 and 1, "   823                          "but got {}".format(p))
   824     return (_VF.dropout_(input, p, training)
   826             else _VF.dropout(input, p, training))
   830 def alpha_dropout(input, p=0.5, training=False, inplace=False):
   832     r"""Applies alpha dropout to the input.   834     See :class:`~torch.nn.AlphaDropout` for details.   837         raise ValueError(
"dropout probability has to be between 0 and 1, "   838                          "but got {}".format(p))
   839     return (_VF.alpha_dropout_(input, p, training)
   841             else _VF.alpha_dropout(input, p, training))
   845 def dropout2d(input, p=0.5, training=True, inplace=False):
   848     Randomly zero out entire channels (a channel is a 2D feature map,   849     e.g., the :math:`j`-th channel of the :math:`i`-th sample in the   850     batched input is a 2D tensor :math:`\text{input}[i, j]`) of the input tensor).   851     Each channel will be zeroed out independently on every forward call with   852     probability :attr:`p` using samples from a Bernoulli distribution.   854     See :class:`~torch.nn.Dropout2d` for details.   857         p: probability of a channel to be zeroed. Default: 0.5   858         training: apply dropout if is ``True``. Default: ``True``   859         inplace: If set to ``True``, will do this operation in-place. Default: ``False``   862         raise ValueError(
"dropout probability has to be between 0 and 1, "   863                          "but got {}".format(p))
   864     return (_VF.feature_dropout_(input, p, training)
   866             else _VF.feature_dropout(input, p, training))
   870 def dropout3d(input, p=0.5, training=True, inplace=False):
   873     Randomly zero out entire channels (a channel is a 3D feature map,   874     e.g., the :math:`j`-th channel of the :math:`i`-th sample in the   875     batched input is a 3D tensor :math:`\text{input}[i, j]`) of the input tensor).   876     Each channel will be zeroed out independently on every forward call with   877     probability :attr:`p` using samples from a Bernoulli distribution.   879     See :class:`~torch.nn.Dropout3d` for details.   882         p: probability of a channel to be zeroed. Default: 0.5   883         training: apply dropout if is ``True``. Default: ``True``   884         inplace: If set to ``True``, will do this operation in-place. Default: ``False``   889         raise ValueError(
"dropout probability has to be between 0 and 1, "   890                          "but got {}".format(p))
   891     return (_VF.feature_dropout_(input, p, training)
   893             else _VF.feature_dropout(input, p, training))
   897 def feature_alpha_dropout(input, p=0.5, training=False, inplace=False):
   900         raise ValueError(
"dropout probability has to be between 0 and 1, "   901                          "but got {}".format(p))
   902     return (_VF.feature_alpha_dropout_(input, p, training)
   904             else _VF.feature_alpha_dropout(input, p, training))
   908 def threshold(input, threshold, value, inplace=False):
   910     r"""Thresholds each element of the input Tensor.   912     See :class:`~torch.nn.Threshold` for more details.   915         result = _VF.threshold_(input, threshold, value)
   917         result = _VF.threshold(input, threshold, value)
   921 threshold_ = _add_docstr(_VF.threshold_, 
r"""   922 threshold_(input, threshold, value) -> Tensor   924 In-place version of :func:`~threshold`.   929 def relu(input, inplace=False):
   931     r"""relu(input, inplace=False) -> Tensor   933     Applies the rectified linear unit function element-wise. See   934     :class:`~torch.nn.ReLU` for more details.   937         result = torch.relu_(input)
   939         result = torch.relu(input)
   943 relu_ = _add_docstr(torch.relu_, 
r"""   944 relu_(input) -> Tensor   946 In-place version of :func:`~relu`.   951 def glu(input, dim=-1):
   954     glu(input, dim=-1) -> Tensor   956     The gated linear unit. Computes:   959         \text{GLU}(a, b) = a \otimes \sigma(b)   961     where `input` is split in half along `dim` to form `a` and `b`, :math:`\sigma`   962     is the sigmoid function and :math:`\otimes` is the element-wise product between matrices.   964     See `Language Modeling with Gated Convolutional Networks <https://arxiv.org/abs/1612.08083>`_.   967         input (Tensor): input tensor   968         dim (int): dimension on which to split the input. Default: -1   971         raise RuntimeError(
"glu does not suppport scalars because halving size must be even")
   972     return torch._C._nn.glu(input, dim)
   976 def hardtanh(input, min_val=-1., max_val=1., inplace=False):
   979     hardtanh(input, min_val=-1., max_val=1., inplace=False) -> Tensor   981     Applies the HardTanh function element-wise. See :class:`~torch.nn.Hardtanh` for more   985         result = torch._C._nn.hardtanh_(input, min_val, max_val)
   987         result = torch._C._nn.hardtanh(input, min_val, max_val)
   991 hardtanh_ = _add_docstr(torch._C._nn.hardtanh_, 
r"""   992 hardtanh_(input, min_val=-1., max_val=1.) -> Tensor   994 In-place version of :func:`~hardtanh`.   999 def relu6(input, inplace=False):
  1001     r"""relu6(input, inplace=False) -> Tensor  1003     Applies the element-wise function :math:`\text{ReLU6}(x) = \min(\max(0,x), 6)`.  1005     See :class:`~torch.nn.ReLU6` for more details.  1007     return hardtanh(input, 0., 6., inplace)
  1011 def elu(input, alpha=1., inplace=False):
  1013     r"""Applies element-wise,  1014     :math:`\text{ELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x) - 1))`.  1016     See :class:`~torch.nn.ELU` for more details.  1019         result = torch._C._nn.elu_(input, alpha)
  1021         result = torch._C._nn.elu(input, alpha)
  1025 elu_ = _add_docstr(torch._C._nn.elu_, 
r"""  1026 elu_(input, alpha=1.) -> Tensor  1028 In-place version of :func:`~elu`.  1033 def selu(input, inplace=False):
  1035     r"""selu(input, inplace=False) -> Tensor  1037     Applies element-wise,  1038     :math:`\text{SELU}(x) = scale * (\max(0,x) + \min(0, \alpha * (\exp(x) - 1)))`,  1039     with :math:`\alpha=1.6732632423543772848170429916717` and  1040     :math:`scale=1.0507009873554804934193349852946`.  1042     See :class:`~torch.nn.SELU` for more details.  1045         result = torch.selu_(input)
  1047         result = torch.selu(input)
  1051 selu_ = _add_docstr(torch.selu_, 
r"""  1052 selu_(input) -> Tensor  1054 In-place version of :func:`~selu`.  1059 def celu(input, alpha=1., inplace=False):
  1061     r"""celu(input, alpha=1., inplace=False) -> Tensor  1063     Applies element-wise,  1064     :math:`\text{CELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x/\alpha) - 1))`.  1066     See :class:`~torch.nn.CELU` for more details.  1069         result = torch.celu_(input, alpha)
  1071         result = torch.celu(input, alpha)
  1074 celu_ = _add_docstr(torch.celu_, 
r"""  1075 celu_(input, alpha=1.) -> Tensor  1077 In-place version of :func:`~celu`.  1082 def leaky_relu(input, negative_slope=0.01, inplace=False):
  1085     leaky_relu(input, negative_slope=0.01, inplace=False) -> Tensor  1087     Applies element-wise,  1088     :math:`\text{LeakyReLU}(x) = \max(0, x) + \text{negative\_slope} * \min(0, x)`  1090     See :class:`~torch.nn.LeakyReLU` for more details.  1093         result = torch._C._nn.leaky_relu_(input, negative_slope)
  1095         result = torch._C._nn.leaky_relu(input, negative_slope)
  1099 leaky_relu_ = _add_docstr(torch._C._nn.leaky_relu_, 
r"""  1100 leaky_relu_(input, negative_slope=0.01) -> Tensor  1102 In-place version of :func:`~leaky_relu`.  1107 def prelu(input, weight):
  1109     r"""prelu(input, weight) -> Tensor  1111     Applies element-wise the function  1112     :math:`\text{PReLU}(x) = \max(0,x) + \text{weight} * \min(0,x)` where weight is a  1113     learnable parameter.  1115     See :class:`~torch.nn.PReLU` for more details.  1117     return torch.prelu(input, weight)
  1121 def rrelu(input, lower=1. / 8, upper=1. / 3, training=False, inplace=False):
  1123     r"""rrelu(input, lower=1./8, upper=1./3, training=False, inplace=False) -> Tensor  1125     Randomized leaky ReLU.  1127     See :class:`~torch.nn.RReLU` for more details.  1130         result = torch.rrelu_(input, lower, upper, training)
  1132         result = torch.rrelu(input, lower, upper, training)
  1136 rrelu_ = _add_docstr(torch.rrelu_, 
r"""  1137 rrelu_(input, lower=1./8, upper=1./3, training=False) -> Tensor  1139 In-place version of :func:`~rrelu`.  1142 logsigmoid = _add_docstr(torch._C._nn.log_sigmoid, 
r"""  1143 logsigmoid(input) -> Tensor  1145 Applies element-wise :math:`\text{LogSigmoid}(x_i) = \log \left(\frac{1}{1 + \exp(-x_i)}\right)`  1147 See :class:`~torch.nn.LogSigmoid` for more details.  1152 def hardshrink(input, lambd=0.5):
  1155     hardshrink(input, lambd=0.5) -> Tensor  1157     Applies the hard shrinkage function element-wise  1159     See :class:`~torch.nn.Hardshrink` for more details.  1161     return torch.hardshrink(input, lambd)
  1165 def tanhshrink(input):
  1166     r"""tanhshrink(input) -> Tensor  1168     Applies element-wise, :math:`\text{Tanhshrink}(x) = x - \text{Tanh}(x)`  1170     See :class:`~torch.nn.Tanhshrink` for more details.  1172     return input - input.tanh()
  1176 def softsign(input):
  1177     r"""softsign(input) -> Tensor  1179     Applies element-wise, the function :math:`\text{SoftSign}(x) = \frac{x}{1 + |x|}`  1181     See :class:`~torch.nn.Softsign` for more details.  1183     return input / (input.abs() + 1)
  1186 softplus = _add_docstr(torch._C._nn.softplus, 
r"""  1187 softplus(input, beta=1, threshold=20) -> Tensor  1192 def _get_softmax_dim(name, ndim, stacklevel):
  1194     warnings.warn(
"Implicit dimension choice for {} has been deprecated. "  1195                   "Change the call to include dim=X as an argument.".format(name), stacklevel=stacklevel)
  1196     if ndim == 0 
or ndim == 1 
or ndim == 3:
  1204 def softmin(input, dim=None, _stacklevel=3, dtype=None):
  1206     r"""Applies a softmin function.  1208     Note that :math:`\text{Softmin}(x) = \text{Softmax}(-x)`. See softmax definition for mathematical formula.  1210     See :class:`~torch.nn.Softmin` for more details.  1213         input (Tensor): input  1214         dim (int): A dimension along which softmin will be computed (so every slice  1215             along dim will sum to 1).  1216         dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.  1217           If specified, the input tensor is casted to :attr:`dtype` before the operation  1218           is performed. This is useful for preventing data type overflows. Default: None.  1221         dim = _get_softmax_dim(
'softmin', input.dim(), _stacklevel)
  1223         ret = (-input).softmax(dim)
  1225         ret = (-input).softmax(dim, dtype=dtype)
  1230 def softmax(input, dim=None, _stacklevel=3, dtype=None):
  1232     r"""Applies a softmax function.  1234     Softmax is defined as:  1236     :math:`\text{Softmax}(x_{i}) = \frac{exp(x_i)}{\sum_j exp(x_j)}`  1238     It is applied to all slices along dim, and will re-scale them so that the elements  1239     lie in the range `[0, 1]` and sum to 1.  1241     See :class:`~torch.nn.Softmax` for more details.  1244         input (Tensor): input  1245         dim (int): A dimension along which softmax will be computed.  1246         dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.  1247           If specified, the input tensor is casted to :attr:`dtype` before the operation  1248           is performed. This is useful for preventing data type overflows. Default: None.  1251         This function doesn't work directly with NLLLoss,  1252         which expects the Log to be computed between the Softmax and itself.  1253         Use log_softmax instead (it's faster and has better numerical properties).  1257         dim = _get_softmax_dim(
'softmax', input.dim(), _stacklevel)
  1259         ret = input.softmax(dim)
  1261         ret = input.softmax(dim, dtype=dtype)
  1266 def gumbel_softmax(logits, tau=1, hard=False, eps=1e-10, dim=-1):
  1269     Samples from the `Gumbel-Softmax distribution`_ and optionally discretizes.  1272       logits: `[..., num_features]` unnormalized log probabilities  1273       tau: non-negative scalar temperature  1274       hard: if ``True``, the returned samples will be discretized as one-hot vectors,  1275             but will be differentiated as if it is the soft sample in autograd  1276       dim (int): A dimension along which softmax will be computed. Default: -1.  1279       Sampled tensor of same shape as `logits` from the Gumbel-Softmax distribution.  1280       If ``hard=True``, the returned samples will be one-hot, otherwise they will  1281       be probability distributions that sum to 1 across `dim`.  1284       This function is here for legacy reasons, may be removed from nn.Functional in the future.  1287       The main trick for `hard` is to do  `y_hard - y_soft.detach() + y_soft`  1289       It achieves two things:  1290       - makes the output value exactly one-hot  1291       (since we add then subtract y_soft value)  1292       - makes the gradient equal to y_soft gradient  1293       (since we strip all other gradients)  1296         >>> logits = torch.randn(20, 32)  1297         >>> # Sample soft categorical using reparametrization trick:  1298         >>> F.gumbel_softmax(logits, tau=1, hard=False)  1299         >>> # Sample hard categorical using "Straight-through" trick:  1300         >>> F.gumbel_softmax(logits, tau=1, hard=True)  1302     .. _Gumbel-Softmax distribution:  1303         https://arxiv.org/abs/1611.00712  1304         https://arxiv.org/abs/1611.01144  1308         warnings.warn(
"`eps` parameter is deprecated and has no effect.")
  1310     gumbels = -torch.empty_like(logits).exponential_().log()  
  1311     gumbels = (logits + gumbels) / tau  
  1312     y_soft = gumbels.softmax(dim)
  1316         index = y_soft.max(dim, keepdim=
True)[1]
  1317         y_hard = torch.zeros_like(logits).scatter_(dim, index, 1.0)
  1318         ret = y_hard - y_soft.detach() + y_soft
  1326 def log_softmax(input, dim=None, _stacklevel=3, dtype=None):
  1328     r"""Applies a softmax followed by a logarithm.  1330     While mathematically equivalent to log(softmax(x)), doing these two  1331     operations separately is slower, and numerically unstable. This function  1332     uses an alternative formulation to compute the output and gradient correctly.  1334     See :class:`~torch.nn.LogSoftmax` for more details.  1337         input (Tensor): input  1338         dim (int): A dimension along which log_softmax will be computed.  1339         dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.  1340           If specified, the input tensor is casted to :attr:`dtype` before the operation  1341           is performed. This is useful for preventing data type overflows. Default: None.  1344         dim = _get_softmax_dim(
'log_softmax', input.dim(), _stacklevel)
  1346         ret = input.log_softmax(dim)
  1348         ret = input.log_softmax(dim, dtype=dtype)
  1352 softshrink = _add_docstr(torch._C._nn.softshrink, 
r"""  1353 softshrink(input, lambd=0.5) -> Tensor  1355 Applies the soft shrinkage function elementwise  1357 See :class:`~torch.nn.Softshrink` for more details.  1363     r"""tanh(input) -> Tensor  1365     Applies element-wise,  1366     :math:`\text{Tanh}(x) = \tanh(x) = \frac{\exp(x) - \exp(-x)}{\exp(x) + \exp(-x)}`  1368     See :class:`~torch.nn.Tanh` for more details.  1370     warnings.warn(
"nn.functional.tanh is deprecated. Use torch.tanh instead.")
  1376     r"""sigmoid(input) -> Tensor  1378     Applies the element-wise function :math:`\text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)}`  1380     See :class:`~torch.nn.Sigmoid` for more details.  1382     warnings.warn(
"nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.")
  1383     return input.sigmoid()
  1387 def linear(input, weight, bias=None):
  1390     Applies a linear transformation to the incoming data: :math:`y = xA^T + b`.  1394         - Input: :math:`(N, *, in\_features)` where `*` means any number of  1395           additional dimensions  1396         - Weight: :math:`(out\_features, in\_features)`  1397         - Bias: :math:`(out\_features)`  1398         - Output: :math:`(N, *, out\_features)`  1400     if input.dim() == 2 
and bias 
is not None:
  1402         ret = torch.addmm(bias, input, weight.t())
  1404         output = input.matmul(weight.t())
  1405         if bias 
is not None:
  1412 def bilinear(input1, input2, weight, bias=None):
  1414     return torch.bilinear(input1, input2, weight, bias)
  1417 def _no_grad_embedding_renorm_(weight, input, max_norm, norm_type):
  1419     with torch.no_grad():
  1420         return torch.embedding_renorm_(weight, input, max_norm, norm_type)
  1424 def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.,
  1425               scale_grad_by_freq=
False, sparse=
False):
  1427     r"""A simple lookup table that looks up embeddings in a fixed dictionary and size.  1429     This module is often used to retrieve word embeddings using indices.  1430     The input to the module is a list of indices, and the embedding matrix,  1431     and the output is the corresponding word embeddings.  1433     See :class:`torch.nn.Embedding` for more details.  1436         input (LongTensor): Tensor containing indices into the embedding matrix  1437         weight (Tensor): The embedding matrix with number of rows equal to the maximum possible index + 1,  1438             and number of columns equal to the embedding size  1439         padding_idx (int, optional): If given, pads the output with the embedding vector at :attr:`padding_idx`  1440                                          (initialized to zeros) whenever it encounters the index.  1441         max_norm (float, optional): If given, each embedding vector with norm larger than :attr:`max_norm`  1442                                     is renormalized to have norm :attr:`max_norm`.  1443                                     Note: this will modify :attr:`weight` in-place.  1444         norm_type (float, optional): The p of the p-norm to compute for the :attr:`max_norm` option. Default ``2``.  1445         scale_grad_by_freq (boolean, optional): If given, this will scale gradients by the inverse of frequency of  1446                                                 the words in the mini-batch. Default ``False``.  1447         sparse (bool, optional): If ``True``, gradient w.r.t. :attr:`weight` will be a sparse tensor. See Notes under  1448                                  :class:`torch.nn.Embedding` for more details regarding sparse gradients.  1451         - Input: LongTensor of arbitrary shape containing the indices to extract  1452         - Weight: Embedding matrix of floating point type with shape `(V, embedding_dim)`,  1453                             where V = maximum index + 1 and embedding_dim = the embedding size  1454         - Output: `(*, embedding_dim)`, where `*` is the input shape  1458         >>> # a batch of 2 samples of 4 indices each  1459         >>> input = torch.tensor([[1,2,4,5],[4,3,2,9]])  1460         >>> # an embedding matrix containing 10 tensors of size 3  1461         >>> embedding_matrix = torch.rand(10, 3)  1462         >>> F.embedding(input, embedding_matrix)  1463         tensor([[[ 0.8490,  0.9625,  0.6753],  1464                  [ 0.9666,  0.7761,  0.6108],  1465                  [ 0.6246,  0.9751,  0.3618],  1466                  [ 0.4161,  0.2419,  0.7383]],  1468                 [[ 0.6246,  0.9751,  0.3618],  1469                  [ 0.0237,  0.7794,  0.0528],  1470                  [ 0.9666,  0.7761,  0.6108],  1471                  [ 0.3385,  0.8612,  0.1867]]])  1473         >>> # example with padding_idx  1474         >>> weights = torch.rand(10, 3)  1475         >>> weights[0, :].zero_()  1476         >>> embedding_matrix = weights  1477         >>> input = torch.tensor([[0,2,0,5]])  1478         >>> F.embedding(input, embedding_matrix, padding_idx=0)  1479         tensor([[[ 0.0000,  0.0000,  0.0000],  1480                  [ 0.5609,  0.5384,  0.8720],  1481                  [ 0.0000,  0.0000,  0.0000],  1482                  [ 0.6262,  0.2438,  0.7471]]])  1484     if padding_idx 
is not None:
  1486             assert padding_idx < weight.size(0), 
'Padding_idx must be within num_embeddings'  1487         elif padding_idx < 0:
  1488             assert padding_idx >= -weight.size(0), 
'Padding_idx must be within num_embeddings'  1489             padding_idx = weight.size(0) + padding_idx
  1492     if max_norm 
is not None:
  1496         input = input.contiguous()
  1501         _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
  1502     return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
  1506 def embedding_bag(input, weight, offsets=None, max_norm=None, norm_type=2,
  1507                   scale_grad_by_freq=
False, mode=
'mean', sparse=
False):
  1509     r"""Computes sums, means or maxes of `bags` of embeddings, without instantiating the  1510     intermediate embeddings.  1512     See :class:`torch.nn.EmbeddingBag` for more details.  1514     .. include:: cuda_deterministic_backward.rst  1517         input (LongTensor): Tensor containing bags of indices into the embedding matrix  1518         weight (Tensor): The embedding matrix with number of rows equal to the maximum possible index + 1,  1519             and number of columns equal to the embedding size  1520         offsets (LongTensor, optional): Only used when :attr:`input` is 1D. :attr:`offsets` determines  1521                              the starting index position of each bag (sequence) in :attr:`input`.  1522         max_norm (float, optional): If given, each embedding vector with norm larger than :attr:`max_norm`  1523                                     is renormalized to have norm :attr:`max_norm`.  1524                                     Note: this will modify :attr:`weight` in-place.  1525         norm_type (float, optional): The ``p`` in the ``p``-norm to compute for the :attr:`max_norm` option.  1527         scale_grad_by_freq (boolean, optional): if given, this will scale gradients by the inverse of frequency of  1528                                                 the words in the mini-batch. Default ``False``.  1529                                                 Note: this option is not supported when ``mode="max"``.  1530         mode (string, optional): ``"sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag.  1532         sparse (bool, optional): if ``True``, gradient w.r.t. :attr:`weight` will be a sparse tensor. See Notes under  1533                                  :class:`torch.nn.Embedding` for more details regarding sparse gradients.  1534                                  Note: this option is not supported when ``mode="max"``.  1538         - :attr:`input` (LongTensor) and :attr:`offsets` (LongTensor, optional)  1540           - If :attr:`input` is 2D of shape `(B, N)`,  1542             it will be treated as ``B`` bags (sequences) each of fixed length ``N``, and  1543             this will return ``B`` values aggregated in a way depending on the :attr:`mode`.  1544             :attr:`offsets` is ignored and required to be ``None`` in this case.  1546           - If :attr:`input` is 1D of shape `(N)`,  1548             it will be treated as a concatenation of multiple bags (sequences).  1549             :attr:`offsets` is required to be a 1D tensor containing the  1550             starting index positions of each bag in :attr:`input`. Therefore,  1551             for :attr:`offsets` of shape `(B)`, :attr:`input` will be viewed as  1552             having ``B`` bags. Empty bags (i.e., having 0-length) will have  1553             returned vectors filled by zeros.  1555         - :attr:`weight` (Tensor): the learnable weights of the module of  1556           shape `(num_embeddings, embedding_dim)`  1558         - :attr:`output`: aggregated embedding values of shape `(B, embedding_dim)`  1562         >>> # an Embedding module containing 10 tensors of size 3  1563         >>> embedding_matrix = torch.rand(10, 3)  1564         >>> # a batch of 2 samples of 4 indices each  1565         >>> input = torch.tensor([1,2,4,5,4,3,2,9])  1566         >>> offsets = torch.tensor([0,4])  1567         >>> F.embedding_bag(embedding_matrix, input, offsets)  1568         tensor([[ 0.3397,  0.3552,  0.5545],  1569                 [ 0.5893,  0.4386,  0.5882]])  1574     if weight.dtype == torch.long 
and input.is_floating_point():
  1575         warnings.warn(
"Argument order of nn.functional.embedding_bag was changed. "  1576                       "Usage `embedding_bag(weight, input, ...)` is deprecated, "  1577                       "and should now be `embedding_bag(input, weight, ...)`.")
  1578         weight, input = input, weight
  1580     if input.dim() == 2:
  1581         if offsets 
is not None:
  1582             raise ValueError(
"if input is 2D, then offsets has to be None"  1583                              ", as input is treated is a mini-batch of"  1584                              " fixed length sequences. However, found "  1585                              "offsets of type {}".format(type(offsets)))
  1587             offsets = torch.arange(0, input.numel(), input.size(1),
  1588                                    dtype=torch.long, device=input.device)
  1590             input = input.reshape(-1)
  1591     elif input.dim() == 1:
  1593             raise ValueError(
"offsets has to be a 1D Tensor but got None")
  1595         if offsets.dim() != 1:
  1596             raise ValueError(
"offsets has to be a 1D Tensor")
  1597         if int(offsets[0]) != 0:
  1598             raise ValueError(
"offsets[0] has to be 0, i.e., the first sequence "  1599                              "in the mini-batch has to start from position 0. "  1600                              "However, got {}".format(offsets[0].item()))
  1601         if int(offsets[-1]) > input.size(0):
  1602             raise ValueError(
"offsets[-1] can not be greater than input's length"  1603                              " ({}), but got offsets[-1] of {}"  1604                              .format(input.size(0), offsets[-1].item()))
  1606         raise ValueError(
"input has to be 1D or 2D Tensor,"  1607                          " but got Tensor of dimension {}".format(input.dim()))
  1611     elif mode == 
'mean':
  1616         if scale_grad_by_freq:
  1617             raise ValueError(
"max mode does not support scaling the gradient by the frequency")
  1620             raise ValueError(
"max mode does not support sparse weights")
  1624         raise ValueError(
"mode has to be one of sum, mean or max")
  1626     if max_norm 
is not None:
  1631         _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
  1633     ret, _, _, _ = torch.embedding_bag(
  1644 def batch_norm(input, running_mean, running_var, weight=None, bias=None,
  1645                training=
False, momentum=0.1, eps=1e-5):
  1647     r"""Applies Batch Normalization for each channel across a batch of data.  1649     See :class:`~torch.nn.BatchNorm1d`, :class:`~torch.nn.BatchNorm2d`,  1650     :class:`~torch.nn.BatchNorm3d` for details.  1663         size_prods = size[0]
  1664         for i 
in range(len(size) - 2):
  1665             size_prods *= size[i + 2]
  1667             raise ValueError(
'Expected more than 1 value per channel when training, got input size {}'.format(size))
  1669     return torch.batch_norm(
  1670         input, weight, bias, running_mean, running_var,
  1671         training, momentum, eps, torch.backends.cudnn.enabled
  1676 def instance_norm(input, running_mean=None, running_var=None, weight=None,
  1677                   bias=
None, use_input_stats=
True, momentum=0.1, eps=1e-5):
  1679     r"""Applies Instance Normalization for each channel in each data sample in a  1682     See :class:`~torch.nn.InstanceNorm1d`, :class:`~torch.nn.InstanceNorm2d`,  1683     :class:`~torch.nn.InstanceNorm3d` for details.  1685     return torch.instance_norm(
  1686         input, weight, bias, running_mean, running_var,
  1687         use_input_stats, momentum, eps, torch.backends.cudnn.enabled
  1692 def layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-5):
  1694     r"""Applies Layer Normalization for last certain number of dimensions.  1696     See :class:`~torch.nn.LayerNorm` for details.  1698     return torch.layer_norm(input, normalized_shape, weight, bias, eps,
  1699                             torch.backends.cudnn.enabled)
  1703 def group_norm(input, num_groups, weight=None, bias=None, eps=1e-5):
  1705     r"""Applies Group Normalization for last certain number of dimensions.  1707     See :class:`~torch.nn.GroupNorm` for details.  1709     return torch.group_norm(input, num_groups, weight, bias, eps,
  1710                             torch.backends.cudnn.enabled)
  1714 def local_response_norm(input, size, alpha=1e-4, beta=0.75, k=1.):
  1716     r"""Applies local response normalization over an input signal composed of  1717     several input planes, where channels occupy the second dimension.  1718     Applies normalization across channels.  1720     See :class:`~torch.nn.LocalResponseNorm` for details.  1724         raise ValueError(
'Expected 3D or higher dimensionality \  1725                          input (got {} dimensions)'.format(dim))
  1726     div = input.mul(input).unsqueeze(1)
  1728         div = pad(div, (0, 0, size // 2, (size - 1) // 2))
  1729         div = avg_pool2d(div, (size, 1), stride=1).squeeze(1)
  1731         sizes = input.size()
  1732         div = div.view(sizes[0], 1, sizes[1], sizes[2], -1)
  1733         div = pad(div, (0, 0, 0, 0, size // 2, (size - 1) // 2))
  1734         div = avg_pool3d(div, (size, 1, 1), stride=1).squeeze(1)
  1735         div = div.view(sizes)
  1736     div = div.mul(alpha).add(k).pow(beta)
  1743 def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0,
  1744              reduction=
'mean', zero_infinity=
False):
  1746     r"""The Connectionist Temporal Classification loss.  1748     See :class:`~torch.nn.CTCLoss` for details.  1750     .. include:: cudnn_deterministic.rst  1751     .. include:: cuda_deterministic_backward.rst  1754         log_probs: :math:`(T, N, C)` where `C = number of characters in alphabet including blank`,  1755             `T = input length`, and `N = batch size`.  1756             The logarithmized probabilities of the outputs  1757             (e.g. obtained with :func:`torch.nn.functional.log_softmax`).  1758         targets: :math:`(N, S)` or `(sum(target_lengths))`.  1759             Targets cannot be blank. In the second form, the targets are assumed to be concatenated.  1760         input_lengths: :math:`(N)`.  1761             Lengths of the inputs (must each be :math:`\leq T`)  1762         target_lengths: :math:`(N)`.  1763             Lengths of the targets  1764         blank (int, optional):  1765             Blank label. Default :math:`0`.  1766         reduction (string, optional): Specifies the reduction to apply to the output:  1767             ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,  1768             ``'mean'``: the output losses will be divided by the target lengths and  1769             then the mean over the batch is taken, ``'sum'``: the output will be  1770             summed. Default: ``'mean'``  1771         zero_infinity (bool, optional):  1772             Whether to zero infinite losses and the associated gradients.  1774             Infinite losses mainly occur when the inputs are too short  1775             to be aligned to the targets.  1779         >>> log_probs = torch.randn(50, 16, 20).log_softmax(2).detach().requires_grad_()  1780         >>> targets = torch.randint(1, 20, (16, 30), dtype=torch.long)  1781         >>> input_lengths = torch.full((16,), 50, dtype=torch.long)  1782         >>> target_lengths = torch.randint(10,30,(16,), dtype=torch.long)  1783         >>> loss = F.ctc_loss(log_probs, targets, input_lengths, target_lengths)  1786     return torch.ctc_loss(log_probs, targets, input_lengths, target_lengths, blank, _Reduction.get_enum(reduction),
  1791 def nll_loss(input, target, weight=None, size_average=None, ignore_index=-100,
  1792              reduce=
None, reduction=
'mean'):
  1794     r"""The negative log likelihood loss.  1796     See :class:`~torch.nn.NLLLoss` for details.  1799         input: :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`  1800             in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)` where :math:`K \geq 1`  1801             in the case of K-dimensional loss.  1802         target: :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`,  1803             or :math:`(N, d_1, d_2, ..., d_K)` where :math:`K \geq 1` for  1805         weight (Tensor, optional): a manual rescaling weight given to each  1806             class. If given, has to be a Tensor of size `C`  1807         size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,  1808             the losses are averaged over each loss element in the batch. Note that for  1809             some losses, there multiple elements per sample. If the field :attr:`size_average`  1810             is set to ``False``, the losses are instead summed for each minibatch. Ignored  1811             when reduce is ``False``. Default: ``True``  1812         ignore_index (int, optional): Specifies a target value that is ignored  1813             and does not contribute to the input gradient. When :attr:`size_average` is  1814             ``True``, the loss is averaged over non-ignored targets. Default: -100  1815         reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the  1816             losses are averaged or summed over observations for each minibatch depending  1817             on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per  1818             batch element instead and ignores :attr:`size_average`. Default: ``True``  1819         reduction (string, optional): Specifies the reduction to apply to the output:  1820             ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,  1821             ``'mean'``: the sum of the output will be divided by the number of  1822             elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`  1823             and :attr:`reduce` are in the process of being deprecated, and in the meantime,  1824             specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``  1828         >>> # input is of size N x C = 3 x 5  1829         >>> input = torch.randn(3, 5, requires_grad=True)  1830         >>> # each element in target has to have 0 <= value < C  1831         >>> target = torch.tensor([1, 0, 4])  1832         >>> output = F.nll_loss(F.log_softmax(input), target)  1833         >>> output.backward()  1835     if size_average 
is not None or reduce 
is not None:
  1836         reduction = _Reduction.legacy_get_string(size_average, reduce)
  1839         raise ValueError(
'Expected 2 or more dimensions (got {})'.format(dim))
  1841     if input.size(0) != target.size(0):
  1842         raise ValueError(
'Expected input batch_size ({}) to match target batch_size ({}).'  1843                          .format(input.size(0), target.size(0)))
  1845         ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
  1847         ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
  1852         out_size = (n,) + input.size()[2:]
  1853         if target.size()[1:] != input.size()[2:]:
  1854             raise ValueError(
'Expected target size {}, got {}'.format(
  1855                 out_size, target.size()))
  1856         input = input.contiguous().view(n, c, 1, -1)
  1857         target = target.contiguous().view(n, 1, -1)
  1858         reduction_enum = _Reduction.get_enum(reduction)
  1859         if reduction != 
'none':
  1860             ret = torch._C._nn.nll_loss2d(
  1861                 input, target, weight, reduction_enum, ignore_index)
  1863             out = torch._C._nn.nll_loss2d(
  1864                 input, target, weight, reduction_enum, ignore_index)
  1865             ret = out.view(out_size)
  1870 def poisson_nll_loss(input, target, log_input=True, full=False, size_average=None, eps=1e-8,
  1871                      reduce=
None, reduction=
'mean'):
  1873     r"""Poisson negative log likelihood loss.  1875     See :class:`~torch.nn.PoissonNLLLoss` for details.  1878         input: expectation of underlying Poisson distribution.  1879         target: random sample :math:`target \sim \text{Poisson}(input)`.  1880         log_input: if ``True`` the loss is computed as  1881             :math:`\exp(\text{input}) - \text{target} * \text{input}`, if ``False`` then loss is  1882             :math:`\text{input} - \text{target} * \log(\text{input}+\text{eps})`. Default: ``True``  1883         full: whether to compute full loss, i. e. to add the Stirling  1884             approximation term. Default: ``False``  1885             :math:`\text{target} * \log(\text{target}) - \text{target} + 0.5 * \log(2 * \pi * \text{target})`.  1886         size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,  1887             the losses are averaged over each loss element in the batch. Note that for  1888             some losses, there multiple elements per sample. If the field :attr:`size_average`  1889             is set to ``False``, the losses are instead summed for each minibatch. Ignored  1890             when reduce is ``False``. Default: ``True``  1891         eps (float, optional): Small value to avoid evaluation of :math:`\log(0)` when  1892             :attr:`log_input`=``False``. Default: 1e-8  1893         reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the  1894             losses are averaged or summed over observations for each minibatch depending  1895             on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per  1896             batch element instead and ignores :attr:`size_average`. Default: ``True``  1897         reduction (string, optional): Specifies the reduction to apply to the output:  1898             ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,  1899             ``'mean'``: the sum of the output will be divided by the number of  1900             elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`  1901             and :attr:`reduce` are in the process of being deprecated, and in the meantime,  1902             specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``  1905     if size_average 
is not None or reduce 
is not None:
  1906         reduction = _Reduction.legacy_get_string(size_average, reduce)
  1908         loss = torch.exp(input) - target * input
  1910         loss = input - target * torch.log(input + eps)
  1913         loss[mask] += (target * torch.log(target) - target + 0.5 * torch.log(2 * math.pi * target))[mask]
  1914     if reduction == 
'none':
  1916     elif reduction == 
'mean':
  1917         ret = torch.mean(loss)
  1918     elif reduction == 
'sum':
  1919         ret = torch.sum(loss)
  1922         raise ValueError(reduction + 
" is not valid")
  1927 def kl_div(input, target, size_average=None, reduce=None, reduction='mean'):
  1929     r"""The `Kullback-Leibler divergence`_ Loss.  1931     See :class:`~torch.nn.KLDivLoss` for details.  1934         input: Tensor of arbitrary shape  1935         target: Tensor of the same shape as input  1936         size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,  1937             the losses are averaged over each loss element in the batch. Note that for  1938             some losses, there multiple elements per sample. If the field :attr:`size_average`  1939             is set to ``False``, the losses are instead summed for each minibatch. Ignored  1940             when reduce is ``False``. Default: ``True``  1941         reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the  1942             losses are averaged or summed over observations for each minibatch depending  1943             on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per  1944             batch element instead and ignores :attr:`size_average`. Default: ``True``  1945         reduction (string, optional): Specifies the reduction to apply to the output:  1946             ``'none'`` | ``'batchmean'`` | ``'sum'`` | ``'mean'``.  1947             ``'none'``: no reduction will be applied  1948             ``'batchmean'``: the sum of the output will be divided by the batchsize  1949             ``'sum'``: the output will be summed  1950             ``'mean'``: the output will be divided by the number of elements in the output  1954         :attr:`size_average` and :attr:`reduce` are in the process of being deprecated,  1955         and in the meantime, specifying either of those two args will override :attr:`reduction`.  1958         :attr:``reduction`` = ``'mean'`` doesn't return the true kl divergence value, please use  1959         :attr:``reduction`` = ``'batchmean'`` which aligns with KL math definition.  1960         In the next major release, ``'mean'`` will be changed to be the same as 'batchmean'.  1962     if size_average 
is not None or reduce 
is not None:
  1963         reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
  1965         if reduction == 
'mean':
  1966             warnings.warn(
"reduction: 'mean' divides the total loss by both the batch size and the support size."  1967                           "'batchmean' divides only by the batch size, and aligns with the KL div math definition."  1968                           "'mean' will be changed to behave the same as 'batchmean' in the next major release.")
  1971         if reduction == 
'batchmean':
  1972             reduction_enum = _Reduction.get_enum(
'sum')
  1974             reduction_enum = _Reduction.get_enum(reduction)
  1976     reduced = torch.kl_div(input, target, reduction_enum)
  1978     if reduction == 
'batchmean' and input.dim() != 0:
  1979         reduced = reduced / input.size()[0]
  1985 def cross_entropy(input, target, weight=None, size_average=None, ignore_index=-100,
  1986                   reduce=
None, reduction=
'mean'):
  1988     r"""This criterion combines `log_softmax` and `nll_loss` in a single  1991     See :class:`~torch.nn.CrossEntropyLoss` for details.  1994         input (Tensor) : :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`  1995             in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)` where :math:`K \geq 1`  1996             in the case of K-dimensional loss.  1997         target (Tensor) : :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`,  1998             or :math:`(N, d_1, d_2, ..., d_K)` where :math:`K \geq 1` for  2000         weight (Tensor, optional): a manual rescaling weight given to each  2001             class. If given, has to be a Tensor of size `C`  2002         size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,  2003             the losses are averaged over each loss element in the batch. Note that for  2004             some losses, there multiple elements per sample. If the field :attr:`size_average`  2005             is set to ``False``, the losses are instead summed for each minibatch. Ignored  2006             when reduce is ``False``. Default: ``True``  2007         ignore_index (int, optional): Specifies a target value that is ignored  2008             and does not contribute to the input gradient. When :attr:`size_average` is  2009             ``True``, the loss is averaged over non-ignored targets. Default: -100  2010         reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the  2011             losses are averaged or summed over observations for each minibatch depending  2012             on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per  2013             batch element instead and ignores :attr:`size_average`. Default: ``True``  2014         reduction (string, optional): Specifies the reduction to apply to the output:  2015             ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,  2016             ``'mean'``: the sum of the output will be divided by the number of  2017             elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`  2018             and :attr:`reduce` are in the process of being deprecated, and in the meantime,  2019             specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``  2023         >>> input = torch.randn(3, 5, requires_grad=True)  2024         >>> target = torch.randint(5, (3,), dtype=torch.int64)  2025         >>> loss = F.cross_entropy(input, target)  2028     if size_average 
is not None or reduce 
is not None:
  2029         reduction = _Reduction.legacy_get_string(size_average, reduce)
  2030     return nll_loss(log_softmax(input, 1), target, weight, 
None, ignore_index, 
None, reduction)
  2034 def binary_cross_entropy(input, target, weight=None, size_average=None,
  2035                          reduce=
None, reduction=
'mean'):
  2037     r"""Function that measures the Binary Cross Entropy  2038     between the target and the output.  2040     See :class:`~torch.nn.BCELoss` for details.  2043         input: Tensor of arbitrary shape  2044         target: Tensor of the same shape as input  2045         weight (Tensor, optional): a manual rescaling weight  2046                 if provided it's repeated to match input tensor shape  2047         size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,  2048             the losses are averaged over each loss element in the batch. Note that for  2049             some losses, there multiple elements per sample. If the field :attr:`size_average`  2050             is set to ``False``, the losses are instead summed for each minibatch. Ignored  2051             when reduce is ``False``. Default: ``True``  2052         reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the  2053             losses are averaged or summed over observations for each minibatch depending  2054             on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per  2055             batch element instead and ignores :attr:`size_average`. Default: ``True``  2056         reduction (string, optional): Specifies the reduction to apply to the output:  2057             ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,  2058             ``'mean'``: the sum of the output will be divided by the number of  2059             elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`  2060             and :attr:`reduce` are in the process of being deprecated, and in the meantime,  2061             specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``  2065         >>> input = torch.randn((3, 2), requires_grad=True)  2066         >>> target = torch.rand((3, 2), requires_grad=False)  2067         >>> loss = F.binary_cross_entropy(F.sigmoid(input), target)  2070     if size_average 
is not None or reduce 
is not None:
  2071         reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
  2073         reduction_enum = _Reduction.get_enum(reduction)
  2074     if not (target.size() == input.size()):
  2075         warnings.warn(
"Using a target size ({}) that is different to the input size ({}) is deprecated. "  2076                       "Please ensure they have the same size.".format(target.size(), input.size()))
  2077     if input.numel() != target.numel():
  2078         raise ValueError(
"Target and input must have the same number of elements. target nelement ({}) "  2079                          "!= input nelement ({})".format(target.numel(), input.numel()))
  2081     if weight 
is not None:
  2082         new_size = _infer_size(target.size(), weight.size())
  2083         weight = weight.expand(new_size)
  2085     return torch._C._nn.binary_cross_entropy(
  2086         input, target, weight, reduction_enum)
  2090 def binary_cross_entropy_with_logits(input, target, weight=None, size_average=None,
  2091                                      reduce=
None, reduction=
'mean', pos_weight=
None):
  2093     r"""Function that measures Binary Cross Entropy between target and output  2096     See :class:`~torch.nn.BCEWithLogitsLoss` for details.  2099         input: Tensor of arbitrary shape  2100         target: Tensor of the same shape as input  2101         weight (Tensor, optional): a manual rescaling weight  2102             if provided it's repeated to match input tensor shape  2103         size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,  2104             the losses are averaged over each loss element in the batch. Note that for  2105             some losses, there multiple elements per sample. If the field :attr:`size_average`  2106             is set to ``False``, the losses are instead summed for each minibatch. Ignored  2107             when reduce is ``False``. Default: ``True``  2108         reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the  2109             losses are averaged or summed over observations for each minibatch depending  2110             on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per  2111             batch element instead and ignores :attr:`size_average`. Default: ``True``  2112         reduction (string, optional): Specifies the reduction to apply to the output:  2113             ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,  2114             ``'mean'``: the sum of the output will be divided by the number of  2115             elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`  2116             and :attr:`reduce` are in the process of being deprecated, and in the meantime,  2117             specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``  2118         pos_weight (Tensor, optional): a weight of positive examples.  2119                 Must be a vector with length equal to the number of classes.  2123          >>> input = torch.randn(3, requires_grad=True)  2124          >>> target = torch.empty(3).random_(2)  2125          >>> loss = F.binary_cross_entropy_with_logits(input, target)  2128     if size_average 
is not None or reduce 
is not None:
  2129         reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
  2131         reduction_enum = _Reduction.get_enum(reduction)
  2133     if not (target.size() == input.size()):
  2134         raise ValueError(
"Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
  2136     return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
  2139 def _pointwise_loss(lambd, lambd_optimized, input, target, reduction='mean'):
  2140     if target.requires_grad:
  2141         d = lambd(input, target)
  2142         if reduction == 
'none':
  2144         return torch.mean(d) 
if reduction == 
'mean' else torch.sum(d)
  2146         expanded_input, expanded_target = torch.broadcast_tensors(input, target)
  2147         return lambd_optimized(expanded_input, expanded_target, _Reduction.get_enum(reduction))
  2151 def _smooth_l1_loss(input, target):
  2153     t = torch.abs(input - target)
  2154     return torch.where(t < 1, 0.5 * t ** 2, t - 0.5)
  2158 def smooth_l1_loss(input, target, size_average=None, reduce=None, reduction='mean'):
  2160     r"""Function that uses a squared term if the absolute  2161     element-wise error falls below 1 and an L1 term otherwise.  2163     See :class:`~torch.nn.SmoothL1Loss` for details.  2165     if size_average 
is not None or reduce 
is not None:
  2166         reduction = _Reduction.legacy_get_string(size_average, reduce)
  2167     if target.requires_grad:
  2168         ret = _smooth_l1_loss(input, target)
  2169         if reduction != 
'none':
  2170             ret = torch.mean(ret) 
if reduction == 
'mean' else torch.sum(ret)
  2172         expanded_input, expanded_target = torch.broadcast_tensors(input, target)
  2173         ret = torch._C._nn.smooth_l1_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
  2178 def l1_loss(input, target, size_average=None, reduce=None, reduction='mean'):
  2180     r"""l1_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor  2182     Function that takes the mean element-wise absolute value difference.  2184     See :class:`~torch.nn.L1Loss` for details.  2186     if size_average 
is not None or reduce 
is not None:
  2187         reduction = _Reduction.legacy_get_string(size_average, reduce)
  2188     if target.requires_grad:
  2189         ret = torch.abs(input - target)
  2190         if reduction != 
'none':
  2191             ret = torch.mean(ret) 
if reduction == 
'mean' else torch.sum(ret)
  2193         expanded_input, expanded_target = torch.broadcast_tensors(input, target)
  2194         ret = torch._C._nn.l1_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
  2199 def mse_loss(input, target, size_average=None, reduce=None, reduction='mean'):
  2201     r"""mse_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor  2203     Measures the element-wise mean squared error.  2205     See :class:`~torch.nn.MSELoss` for details.  2207     if size_average 
is not None or reduce 
is not None:
  2208         reduction = _Reduction.legacy_get_string(size_average, reduce)
  2209     if target.requires_grad:
  2210         ret = (input - target) ** 2
  2211         if reduction != 
'none':
  2212             ret = torch.mean(ret) 
if reduction == 
'mean' else torch.sum(ret)
  2214         expanded_input, expanded_target = torch.broadcast_tensors(input, target)
  2215         ret = torch._C._nn.mse_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
  2220 def margin_ranking_loss(input1, input2, target, margin=0, size_average=None,
  2221                         reduce=
None, reduction=
'mean'):
  2223     r"""margin_ranking_loss(input1, input2, target, margin=0, size_average=None, reduce=None, reduction='mean') -> Tensor  2225     See :class:`~torch.nn.MarginRankingLoss` for details.  2227     if size_average 
is not None or reduce 
is not None:
  2228         reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
  2230         reduction_enum = _Reduction.get_enum(reduction)
  2231     if input1.dim() == 0 
or input2.dim() == 0 
or target.dim() == 0:
  2232         raise RuntimeError((
"margin_ranking_loss does not support scalars, got sizes: "  2233                             "input1: {}, input2: {}, target: {} ".format(input1.size(), input2.size(), target.size())))
  2234     return torch.margin_ranking_loss(input1, input2, target, margin, reduction_enum)
  2238 def hinge_embedding_loss(input, target, margin=1.0, size_average=None,
  2239                          reduce=
None, reduction=
'mean'):
  2241     r"""hinge_embedding_loss(input, target, margin=1.0, size_average=None, reduce=None, reduction='mean') -> Tensor  2243     See :class:`~torch.nn.HingeEmbeddingLoss` for details.  2245     if size_average 
is not None or reduce 
is not None:
  2246         reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
  2248         reduction_enum = _Reduction.get_enum(reduction)
  2249     return torch.hinge_embedding_loss(input, target, margin, reduction_enum)
  2253 def multilabel_margin_loss(input, target, size_average=None, reduce=None, reduction='mean'):
  2255     r"""multilabel_margin_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor  2257     See :class:`~torch.nn.MultiLabelMarginLoss` for details.  2259     if size_average 
is not None or reduce 
is not None:
  2260         reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
  2262         reduction_enum = _Reduction.get_enum(reduction)
  2263     return torch._C._nn.multilabel_margin_loss(input, target, reduction_enum)
  2267 def soft_margin_loss(input, target, size_average=None, reduce=None, reduction='mean'):
  2269     r"""soft_margin_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor  2271     See :class:`~torch.nn.SoftMarginLoss` for details.  2273     if size_average 
is not None or reduce 
is not None:
  2274         reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
  2276         reduction_enum = _Reduction.get_enum(reduction)
  2277     return torch._C._nn.soft_margin_loss(input, target, reduction_enum)
  2281 def multilabel_soft_margin_loss(input, target, weight=None, size_average=None,
  2282                                 reduce=
None, reduction=
'mean'):
  2284     r"""multilabel_soft_margin_loss(input, target, weight=None, size_average=None) -> Tensor  2286     See :class:`~torch.nn.MultiLabelSoftMarginLoss` for details.  2288     if size_average 
is not None or reduce 
is not None:
  2289         reduction = _Reduction.legacy_get_string(size_average, reduce)
  2291     loss = -(target * logsigmoid(input) + (1 - target) * logsigmoid(-input))
  2293     if weight 
is not None:
  2294         loss = loss * weight
  2296     loss = loss.sum(dim=1) / input.size(1)  
  2298     if reduction == 
'none':
  2300     elif reduction == 
'mean':
  2302     elif reduction == 
'sum':
  2306         raise ValueError(reduction + 
" is not valid")
  2311 def cosine_embedding_loss(input1, input2, target, margin=0, size_average=None,
  2312                           reduce=
None, reduction=
'mean'):
  2314     r"""cosine_embedding_loss(input1, input2, target, margin=0, size_average=None, reduce=None, reduction='mean') -> Tensor  2316     See :class:`~torch.nn.CosineEmbeddingLoss` for details.  2318     if size_average 
is not None or reduce 
is not None:
  2319         reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
  2321         reduction_enum = _Reduction.get_enum(reduction)
  2322     return torch.cosine_embedding_loss(input1, input2, target, margin, reduction_enum)
  2326 def multi_margin_loss(input, target, p=1, margin=1., weight=None, size_average=None,
  2327                       reduce=
None, reduction=
'mean'):
  2329     r"""multi_margin_loss(input, target, p=1, margin=1, weight=None, size_average=None,  2330                           reduce=None, reduction='mean') -> Tensor  2332     See :class:`~torch.nn.MultiMarginLoss` for details.  2334     if size_average 
is not None or reduce 
is not None:
  2335         reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
  2337         reduction_enum = _Reduction.get_enum(reduction)
  2338     if p != 1 
and p != 2:
  2339         raise ValueError(
'only p == 1 and p == 2 supported')
  2340     if weight 
is not None:
  2341         if weight.dim() != 1:
  2342             raise ValueError(
'weight must be one-dimensional')
  2344     return torch._C._nn.multi_margin_loss(input, target, p, margin, weight, reduction_enum)
  2347 pixel_shuffle = _add_docstr(torch.pixel_shuffle, 
r"""  2348 Rearranges elements in a tensor of shape :math:`(*, C \times r^2, H, W)` to a  2349 tensor of shape :math:`(*, C, H \times r, W \times r)`.  2351 See :class:`~torch.nn.PixelShuffle` for details.  2354     input (Tensor): the input tensor  2355     upscale_factor (int): factor to increase spatial resolution by  2359     >>> input = torch.randn(1, 9, 4, 4)  2360     >>> output = torch.nn.functional.pixel_shuffle(input, 3)  2361     >>> print(output.size())  2362     torch.Size([1, 1, 12, 12])  2366 def upsample(input, size=None, scale_factor=None, mode='nearest', align_corners=None):
  2367     r"""Upsamples the input to either the given :attr:`size` or the given  2368     :attr:`scale_factor`  2371         This function is deprecated in favor of :func:`torch.nn.functional.interpolate`.  2372         This is equivalent with ``nn.functional.interpolate(...)``.  2374     .. include:: cuda_deterministic_backward.rst  2376     The algorithm used for upsampling is determined by :attr:`mode`.  2378     Currently temporal, spatial and volumetric upsampling are supported, i.e.  2379     expected inputs are 3-D, 4-D or 5-D in shape.  2381     The input dimensions are interpreted in the form:  2382     `mini-batch x channels x [optional depth] x [optional height] x width`.  2384     The modes available for upsampling are: `nearest`, `linear` (3D-only),  2385     `bilinear`, `bicubic` (4D-only), `trilinear` (5D-only)  2388         input (Tensor): the input tensor  2389         size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]):  2390             output spatial size.  2391         scale_factor (float or Tuple[float]): multiplier for spatial size. Has to be an integer.  2392         mode (string): algorithm used for upsampling:  2393             ``'nearest'`` | ``'linear'`` | ``'bilinear'`` | ``'bicubic'`` |  2394             ``'trilinear'``. Default: ``'nearest'``  2395         align_corners (bool, optional): Geometrically, we consider the pixels of the  2396             input and output as squares rather than points.  2397             If set to ``True``, the input and output tensors are aligned by the  2398             center points of their corner pixels. If set to ``False``, the input and  2399             output tensors are aligned by the corner points of their corner  2400             pixels, and the interpolation uses edge value padding for out-of-boundary values.  2401             This only has effect when :attr:`mode` is ``'linear'``,  2402             ``'bilinear'``, ``'bicubic'`` or ``'trilinear'``.  2406         With ``align_corners = True``, the linearly interpolating modes  2407         (`linear`, `bilinear`, and `trilinear`) don't proportionally align the  2408         output and input pixels, and thus the output values can depend on the  2409         input size. This was the default behavior for these modes up to version  2410         0.3.1. Since then, the default behavior is ``align_corners = False``.  2411         See :class:`~torch.nn.Upsample` for concrete examples on how this  2412         affects the outputs.  2415     warnings.warn(
"nn.functional.upsample is deprecated. Use nn.functional.interpolate instead.")
  2416     return interpolate(input, size, scale_factor, mode, align_corners)
  2419 def interpolate(input, size=None, scale_factor=None, mode='nearest', align_corners=None):
  2420     r"""Down/up samples the input to either the given :attr:`size` or the given  2421     :attr:`scale_factor`  2423     The algorithm used for interpolation is determined by :attr:`mode`.  2425     Currently temporal, spatial and volumetric sampling are supported, i.e.  2426     expected inputs are 3-D, 4-D or 5-D in shape.  2428     The input dimensions are interpreted in the form:  2429     `mini-batch x channels x [optional depth] x [optional height] x width`.  2431     The modes available for resizing are: `nearest`, `linear` (3D-only),  2432     `bilinear`, `bicubic` (4D-only), `trilinear` (5D-only), `area`  2435         input (Tensor): the input tensor  2436         size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]):  2437             output spatial size.  2438         scale_factor (float or Tuple[float]): multiplier for spatial size. Has to match input size if it is a tuple.  2439         mode (str): algorithm used for upsampling:  2440             ``'nearest'`` | ``'linear'`` | ``'bilinear'`` | ``'bicubic'`` |  2441             ``'trilinear'`` | ``'area'``. Default: ``'nearest'``  2442         align_corners (bool, optional): Geometrically, we consider the pixels of the  2443             input and output as squares rather than points.  2444             If set to ``True``, the input and output tensors are aligned by the  2445             center points of their corner pixels. If set to ``False``, the input and  2446             output tensors are aligned by the corner points of their corner  2447             pixels, and the interpolation uses edge value padding for out-of-boundary values.  2448             This only has effect when :attr:`mode` is ``'linear'``,  2449             ``'bilinear'``, ``'bicubic'``, or ``'trilinear'``.  2453         With ``align_corners = True``, the linearly interpolating modes  2454         (`linear`, `bilinear`, and `trilinear`) don't proportionally align the  2455         output and input pixels, and thus the output values can depend on the  2456         input size. This was the default behavior for these modes up to version  2457         0.3.1. Since then, the default behavior is ``align_corners = False``.  2458         See :class:`~torch.nn.Upsample` for concrete examples on how this  2459         affects the outputs.  2461     .. include:: cuda_deterministic_backward.rst  2463     from numbers 
import Integral
  2464     from .modules.utils 
import _ntuple
  2466     def _check_size_scale_factor(dim):
  2467         if size 
is None and scale_factor 
is None:
  2468             raise ValueError(
'either size or scale_factor should be defined')
  2469         if size 
is not None and scale_factor 
is not None:
  2470             raise ValueError(
'only one of size or scale_factor should be defined')
  2471         if scale_factor 
is not None and isinstance(scale_factor, tuple)\
  2472                 and len(scale_factor) != dim:
  2473             raise ValueError(
'scale_factor shape must match input shape. '  2474                              'Input is {}D, scale_factor size is {}'.format(dim, len(scale_factor)))
  2476     def _output_size(dim):
  2477         _check_size_scale_factor(dim)
  2478         if size 
is not None:
  2480         scale_factors = _ntuple(dim)(scale_factor)
  2482         return [int(math.floor(input.size(i + 2) * scale_factors[i])) 
for i 
in range(dim)]
  2484     if mode 
in (
'nearest', 
'area'):
  2485         if align_corners 
is not None:
  2486             raise ValueError(
"align_corners option can only be set with the "  2487                              "interpolating modes: linear | bilinear | bicubic | trilinear")
  2489         if align_corners 
is None:
  2490             warnings.warn(
"Default upsampling behavior when mode={} is changed "  2491                           "to align_corners=False since 0.4.0. Please specify "  2492                           "align_corners=True if the old behavior is desired. "  2493                           "See the documentation of nn.Upsample for details.".format(mode))
  2494             align_corners = 
False  2496     if input.dim() == 3 
and mode == 
'nearest':
  2497         return torch._C._nn.upsample_nearest1d(input, _output_size(1))
  2498     elif input.dim() == 4 
and mode == 
'nearest':
  2499         return torch._C._nn.upsample_nearest2d(input, _output_size(2))
  2500     elif input.dim() == 5 
and mode == 
'nearest':
  2501         return torch._C._nn.upsample_nearest3d(input, _output_size(3))
  2502     elif input.dim() == 3 
and mode == 
'area':
  2503         return adaptive_avg_pool1d(input, _output_size(1))
  2504     elif input.dim() == 4 
and mode == 
'area':
  2505         return adaptive_avg_pool2d(input, _output_size(2))
  2506     elif input.dim() == 5 
and mode == 
'area':
  2507         return adaptive_avg_pool3d(input, _output_size(3))
  2508     elif input.dim() == 3 
and mode == 
'linear':
  2509         return torch._C._nn.upsample_linear1d(input, _output_size(1), align_corners)
  2510     elif input.dim() == 3 
and mode == 
'bilinear':
  2511         raise NotImplementedError(
"Got 3D input, but bilinear mode needs 4D input")
  2512     elif input.dim() == 3 
and mode == 
'trilinear':
  2513         raise NotImplementedError(
"Got 3D input, but trilinear mode needs 5D input")
  2514     elif input.dim() == 4 
and mode == 
'linear':
  2515         raise NotImplementedError(
"Got 4D input, but linear mode needs 3D input")
  2516     elif input.dim() == 4 
and mode == 
'bilinear':
  2517         return torch._C._nn.upsample_bilinear2d(input, _output_size(2), align_corners)
  2518     elif input.dim() == 4 
and mode == 
'trilinear':
  2519         raise NotImplementedError(
"Got 4D input, but trilinear mode needs 5D input")
  2520     elif input.dim() == 5 
and mode == 
'linear':
  2521         raise NotImplementedError(
"Got 5D input, but linear mode needs 3D input")
  2522     elif input.dim() == 5 
and mode == 
'bilinear':
  2523         raise NotImplementedError(
"Got 5D input, but bilinear mode needs 4D input")
  2524     elif input.dim() == 5 
and mode == 
'trilinear':
  2525         return torch._C._nn.upsample_trilinear3d(input, _output_size(3), align_corners)
  2526     elif input.dim() == 4 
and mode == 
'bicubic':
  2527         return torch._C._nn.upsample_bicubic2d(input, _output_size(2), align_corners)
  2529         raise NotImplementedError(
"Input Error: Only 3D, 4D and 5D input Tensors supported"  2530                                   " (got {}D) for the modes: nearest | linear | bilinear | bicubic | trilinear"  2531                                   " (got {})".format(input.dim(), mode))
  2534 def upsample_nearest(input, size=None, scale_factor=None):
  2535     r"""Upsamples the input, using nearest neighbours' pixel values.  2538         This function is deprecated in favor of :func:`torch.nn.functional.interpolate`.  2539         This is equivalent with ``nn.functional.interpolate(..., mode='nearest')``.  2541     Currently spatial and volumetric upsampling are supported (i.e. expected  2542     inputs are 4 or 5 dimensional).  2545         input (Tensor): input  2546         size (int or Tuple[int, int] or Tuple[int, int, int]): output spatia  2548         scale_factor (int): multiplier for spatial size. Has to be an integer.  2550     .. include:: cuda_deterministic_backward.rst  2553     warnings.warn(
"nn.functional.upsample_nearest is deprecated. Use nn.functional.interpolate instead.")
  2554     return interpolate(input, size, scale_factor, mode=
'nearest')
  2557 def upsample_bilinear(input, size=None, scale_factor=None):
  2558     r"""Upsamples the input, using bilinear upsampling.  2561         This function is deprecated in favor of :func:`torch.nn.functional.interpolate`.  2562         This is equivalent with  2563         ``nn.functional.interpolate(..., mode='bilinear', align_corners=True)``.  2565     Expected inputs are spatial (4 dimensional). Use `upsample_trilinear` fo  2566     volumetric (5 dimensional) inputs.  2569         input (Tensor): input  2570         size (int or Tuple[int, int]): output spatial size.  2571         scale_factor (int or Tuple[int, int]): multiplier for spatial size  2573     .. include:: cuda_deterministic_backward.rst  2576     warnings.warn(
"nn.functional.upsample_bilinear is deprecated. Use nn.functional.interpolate instead.")
  2577     return interpolate(input, size, scale_factor, mode=
'bilinear', align_corners=
True)
  2580 GRID_SAMPLE_INTERPOLATION_MODES = {
  2585 GRID_SAMPLE_PADDING_MODES = {
  2593 def grid_sample(input, grid, mode='bilinear', padding_mode='zeros'):
  2595     r"""Given an :attr:`input` and a flow-field :attr:`grid`, computes the  2596     ``output`` using :attr:`input` values and pixel locations from :attr:`grid`.  2598     Currently, only spatial (4-D) and volumetric (5-D) :attr:`input` are  2601     In the spatial (4-D) case, for :attr:`input` with shape  2602     :math:`(N, C, H_\text{in}, W_\text{in})` and :attr:`grid` with shape  2603     :math:`(N, H_\text{out}, W_\text{out}, 2)`, the output will have shape  2604     :math:`(N, C, H_\text{out}, W_\text{out})`.  2606     For each output location ``output[n, :, h, w]``, the size-2 vector  2607     ``grid[n, h, w]`` specifies :attr:`input` pixel locations ``x`` and ``y``,  2608     which are used to interpolate the output value ``output[n, :, h, w]``.  2609     In the case of 5D inputs, ``grid[n, d, h, w]`` specifies the  2610     ``x``, ``y``, ``z`` pixel locations for interpolating  2611     ``output[n, :, d, h, w]``. :attr:`mode` argument specifies ``nearest`` or  2612     ``bilinear`` interpolation method to sample the input pixels.  2614     :attr:`grid` should have most values in the range of ``[-1, 1]``. This is  2615     because the pixel locations are normalized by the :attr:`input` spatial  2616     dimensions. For example, values ``x = -1, y = -1`` is the left-top pixel of  2617     :attr:`input`, and values  ``x = 1, y = 1`` is the right-bottom pixel of  2620     If :attr:`grid` has values outside the range of ``[-1, 1]``, those locations  2621     are handled as defined by :attr:`padding_mode`. Options are  2623         * ``padding_mode="zeros"``: use ``0`` for out-of-bound values,  2624         * ``padding_mode="border"``: use border values for out-of-bound values,  2625         * ``padding_mode="reflection"``: use values at locations reflected by  2626           the border for out-of-bound values. For location far away from the  2627           border, it will keep being reflected until becoming in bound, e.g.,  2628           (normalized) pixel location ``x = -3.5`` reflects by ``-1`` and  2629           becomes ``x' = 1.5``, then reflects by border ``1`` and becomes  2632     .. Note:: This function is often used in building Spatial Transformer Networks.  2633     .. include:: cuda_deterministic_backward.rst  2636         input (Tensor): input of shape :math:`(N, C, H_\text{in}, W_\text{in})` (4-D case)  2637                         or :math:`(N, C, D_\text{in}, H_\text{in}, W_\text{in})` (5-D case)  2638         grid (Tensor): flow-field of shape :math:`(N, H_\text{out}, W_\text{out}, 2)` (4-D case)  2639                        or :math:`(N, D_\text{out}, H_\text{out}, W_\text{out}, 3)` (5-D case)  2640         mode (str): interpolation mode to calculate output values  2641             ``'bilinear'`` | ``'nearest'``. Default: ``'bilinear'``  2642         padding_mode (str): padding mode for outside grid values  2643             ``'zeros'`` | ``'border'`` | ``'reflection'``. Default: ``'zeros'``  2646         output (Tensor): output Tensor  2649     if mode != 
'bilinear' and mode != 
'nearest':
  2650         raise ValueError(
"nn.functional.grid_sample(): expected mode to be "  2651                          "'bilinear' or 'nearest', but got: '{}'".format(mode))
  2652     if padding_mode != 
'zeros' and padding_mode != 
'border' and padding_mode != 
'reflection':
  2653         raise ValueError(
"nn.functional.grid_sample(): expected padding_mode "  2654                          "to be 'zeros', 'border', or 'reflection', "  2655                          "but got: '{}'".format(padding_mode))
  2657     if mode == 
'bilinear':
  2662     if padding_mode == 
'zeros':
  2663         padding_mode_enum = 0
  2664     elif padding_mode == 
'border':
  2665         padding_mode_enum = 1
  2667         padding_mode_enum = 2
  2669     return torch.grid_sampler(input, grid, mode_enum, padding_mode_enum)
  2673 def affine_grid(theta, size):
  2675     r"""Generates a 2d flow field, given a batch of affine matrices :attr:`theta`.  2676     Generally used in conjunction with :func:`grid_sample` to  2677     implement Spatial Transformer Networks.  2680         theta (Tensor): input batch of affine matrices (:math:`N \times 2 \times 3`)  2681         size (torch.Size): the target output image size (:math:`N \times C \times H \times W`).  2682             Example: torch.Size((32, 3, 24, 24))  2685         output (Tensor): output Tensor of size (:math:`N \times H \times W \times 2`)  2687     return vision.affine_grid_generator(theta, size)
  2691 def pad(input, pad, mode='constant', value=0):
  2696         The padding size by which to pad some dimensions of :attr:`input`  2697         are described starting from the last dimension and moving forward.  2698         :math:`\left\lfloor\frac{\text{len(pad)}}{2}\right\rfloor` dimensions  2699         of ``input`` will be padded.  2700         For example, to pad only the last dimension of the input tensor, then  2701         :attr:`pad` has the form  2702         :math:`(\text{padding\_left}, \text{padding\_right})`;  2703         to pad the last 2 dimensions of the input tensor, then use  2704         :math:`(\text{padding\_left}, \text{padding\_right},`  2705         :math:`\text{padding\_top}, \text{padding\_bottom})`;  2706         to pad the last 3 dimensions, use  2707         :math:`(\text{padding\_left}, \text{padding\_right},`  2708         :math:`\text{padding\_top}, \text{padding\_bottom}`  2709         :math:`\text{padding\_front}, \text{padding\_back})`.  2712         See :class:`torch.nn.ConstantPad2d`, :class:`torch.nn.ReflectionPad2d`, and  2713         :class:`torch.nn.ReplicationPad2d` for concrete examples on how each of the  2714         padding modes works. Constant padding is implemented for arbitrary dimensions.  2715         Replicate padding is implemented for padding the last 3 dimensions of 5D input  2716         tensor, or the last 2 dimensions of 4D input tensor, or the last dimension of  2717         3D input tensor. Reflect padding is only implemented for padding the last 2  2718         dimensions of 4D input tensor, or the last dimension of 3D input tensor.  2720     .. include:: cuda_deterministic_backward.rst  2723         input (Tensor): N-dimensional tensor  2724         pad (tuple): m-elements tuple, where  2725             :math:`\frac{m}{2} \leq` input dimensions and :math:`m` is even.  2726         mode: ``'constant'``, ``'reflect'``, ``'replicate'`` or ``'circular'``.  2727             Default: ``'constant'``  2728         value: fill value for ``'constant'`` padding. Default: ``0``  2732         >>> t4d = torch.empty(3, 3, 4, 2)  2733         >>> p1d = (1, 1) # pad last dim by 1 on each side  2734         >>> out = F.pad(t4d, p1d, "constant", 0)  # effectively zero padding  2735         >>> print(out.data.size())  2736         torch.Size([3, 3, 4, 4])  2737         >>> p2d = (1, 1, 2, 2) # pad last dim by (1, 1) and 2nd to last by (2, 2)  2738         >>> out = F.pad(t4d, p2d, "constant", 0)  2739         >>> print(out.data.size())  2740         torch.Size([3, 3, 8, 4])  2741         >>> t4d = torch.empty(3, 3, 4, 2)  2742         >>> p3d = (0, 1, 2, 1, 3, 3) # pad by (0, 1), (2, 1), and (3, 3)  2743         >>> out = F.pad(t4d, p3d, "constant", 0)  2744         >>> print(out.data.size())  2745         torch.Size([3, 9, 7, 3])  2748     assert len(pad) % 2 == 0, 
'Padding length must be divisible by 2'  2749     assert len(pad) // 2 <= input.dim(), 
'Padding length too large'  2750     if mode == 
'constant':
  2751         ret = _VF.constant_pad_nd(input, pad, value)
  2753         assert value == 0, 
'Padding mode "{}"" doesn\'t take in value argument'.format(mode)
  2754         if input.dim() == 3:
  2755             assert len(pad) == 2, 
'3D tensors expect 2 values for padding'  2756             if mode == 
'reflect':
  2757                 ret = torch._C._nn.reflection_pad1d(input, pad)
  2758             elif mode == 
'replicate':
  2759                 ret = torch._C._nn.replication_pad1d(input, pad)
  2760             elif mode == 
'circular':
  2761                 ret = pad_circular(input, pad)
  2764                 raise NotImplementedError
  2766         elif input.dim() == 4:
  2767             assert len(pad) == 4, 
'4D tensors expect 4 values for padding'  2768             if mode == 
'reflect':
  2769                 ret = torch._C._nn.reflection_pad2d(input, pad)
  2770             elif mode == 
'replicate':
  2771                 ret = torch._C._nn.replication_pad2d(input, pad)
  2772             elif mode == 
'circular':
  2773                 ret = pad_circular(input, pad)
  2776                 raise NotImplementedError
  2778         elif input.dim() == 5:
  2779             assert len(pad) == 6, 
'5D tensors expect 6 values for padding'  2780             if mode == 
'reflect':
  2782                 raise NotImplementedError
  2783             elif mode == 
'replicate':
  2784                 ret = torch._C._nn.replication_pad3d(input, pad)
  2785             elif mode == 
'circular':
  2786                 ret = pad_circular(input, pad)
  2789                 raise NotImplementedError
  2792             raise NotImplementedError(
"Only 3D, 4D, 5D padding with non-constant padding are supported for now")
  2800 def pairwise_distance(x1, x2, p=2., eps=1e-6, keepdim=False):
  2803     See :class:`torch.nn.PairwiseDistance` for details  2805     return torch.pairwise_distance(x1, x2, p, eps, keepdim)
  2808 pdist = _add_docstr(torch.pdist, 
r"""  2809 pdist(input, p=2) -> Tensor  2811 Computes the p-norm distance between every pair of row vectors in the input.  2812 This is identical to the upper triangular portion, excluding the diagonal, of  2813 `torch.norm(input[:, None] - input, dim=2, p=p)`. This function will be faster  2814 if the rows are contiguous.  2816 If input has shape :math:`N \times M` then the output will have shape  2817 :math:`\frac{1}{2} N (N - 1)`.  2819 This function is equivalent to `scipy.spatial.distance.pdist(input,  2820 'minkowski', p=p)` if :math:`p \in (0, \infty)`. When :math:`p = 0` it is  2821 equivalent to `scipy.spatial.distance.pdist(input, 'hamming') * M`.  2822 When :math:`p = \infty`, the closest scipy function is  2823 `scipy.spatial.distance.pdist(xn, lambda x, y: np.abs(x - y).max())`.  2826     input: input tensor of shape :math:`N \times M`.  2827     p: p value for the p-norm distance to calculate between each vector pair  2828         :math:`\in [0, \infty]`.  2832 cosine_similarity = _add_docstr(torch.cosine_similarity, 
r"""  2833 cosine_similarity(x1, x2, dim=1, eps=1e-8) -> Tensor  2835 Returns cosine similarity between x1 and x2, computed along dim.  2838     \text{similarity} = \dfrac{x_1 \cdot x_2}{\max(\Vert x_1 \Vert _2 \cdot \Vert x_2 \Vert _2, \epsilon)}  2841     x1 (Tensor): First input.  2842     x2 (Tensor): Second input (of size matching x1).  2843     dim (int, optional): Dimension of vectors. Default: 1  2844     eps (float, optional): Small value to avoid division by zero.  2848     - Input: :math:`(\ast_1, D, \ast_2)` where D is at position `dim`.  2849     - Output: :math:`(\ast_1, \ast_2)` where 1 is at position `dim`.  2853     >>> input1 = torch.randn(100, 128)  2854     >>> input2 = torch.randn(100, 128)  2855     >>> output = F.cosine_similarity(input1, input2)  2860 one_hot = _add_docstr(torch._C._nn.one_hot, 
r"""  2861 one_hot(tensor, num_classes=0) -> LongTensor  2863 Takes LongTensor with index values of shape ``(*)`` and returns a tensor  2864 of shape ``(*, num_classes)`` that have zeros everywhere except where the  2865 index of last dimension matches the corresponding value of the input tensor,  2866 in which case it will be 1.  2868 See also `One-hot on Wikipedia`_ .  2870 .. _One-hot on Wikipedia:  2871     https://en.wikipedia.org/wiki/One-hot  2874     tensor (LongTensor): class values of any shape.  2875     num_classes (int):  Total number of classes. If set to -1, the number  2876         of classes will be inferred as one greater than the largest class  2877         value in the input tensor.  2880     LongTensor that has one more dimension with 1 values at the  2881     index of last dimension indicated by the input, and 0 everywhere  2885     >>> F.one_hot(torch.arange(0, 5) % 3)  2891     >>> F.one_hot(torch.arange(0, 5) % 3, num_classes=5)  2892     tensor([[1, 0, 0, 0, 0],  2897     >>> F.one_hot(torch.arange(0, 6).view(3,2) % 3)  2908 def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-6, swap=False, size_average=None,
  2909                         reduce=
None, reduction=
"mean"):
  2912     See :class:`~torch.nn.TripletMarginLoss` for details  2914     if size_average 
is not None or reduce 
is not None:
  2915         reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
  2917         reduction_enum = _Reduction.get_enum(reduction)
  2918     return torch.triplet_margin_loss(anchor, positive, negative, margin, p, eps,
  2919                                      swap, reduction_enum)
  2923 def normalize(input, p=2, dim=1, eps=1e-12, out=None):
  2925     r"""Performs :math:`L_p` normalization of inputs over specified dimension.  2927     For a tensor :attr:`input` of sizes :math:`(n_0, ..., n_{dim}, ..., n_k)`, each  2928     :math:`n_{dim}` -element vector :math:`v` along dimension :attr:`dim` is transformed as  2931         v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}.  2933     With the default arguments it uses the Euclidean norm over vectors along dimension :math:`1` for normalization.  2936         input: input tensor of any shape  2937         p (float): the exponent value in the norm formulation. Default: 2  2938         dim (int): the dimension to reduce. Default: 1  2939         eps (float): small value to avoid division by zero. Default: 1e-12  2940         out (Tensor, optional): the output tensor. If :attr:`out` is used, this  2941                                 operation won't be differentiable.  2944         denom = input.norm(p, dim, 
True).clamp_min(eps).expand_as(input)
  2947         denom = input.norm(p, dim, 
True).clamp_min(eps).expand_as(input)
  2948         ret = torch.div(input, denom, out=out)
  2952 def assert_int_or_pair(arg, arg_name, message):
  2953     assert isinstance(arg, int) 
or len(arg) == 2, message.format(arg_name)
  2957 def unfold(input, kernel_size, dilation=1, padding=0, stride=1):
  2959     r"""Extracts sliding local blocks from an batched input tensor.  2962         Currently, only 4-D input tensors (batched image-like tensors) are  2967         More than one element of the unfolded tensor may refer to a single  2968         memory location. As a result, in-place operations (especially ones that  2969         are vectorized) may result in incorrect behavior. If you need to write  2970         to the tensor, please clone it first.  2973     See :class:`torch.nn.Unfold` for details  2976     if input.dim() == 4:
  2977         msg = 
'{} must be int or 2-tuple for 4D input'  2978         assert_int_or_pair(kernel_size, 
'kernel_size', msg)
  2979         assert_int_or_pair(dilation, 
'dilation', msg)
  2980         assert_int_or_pair(padding, 
'padding', msg)
  2981         assert_int_or_pair(stride, 
'stride', msg)
  2983         ret = torch._C._nn.thnn_im2col(input, _pair(kernel_size),
  2984                                        _pair(dilation), _pair(padding), _pair(stride))
  2986         raise NotImplementedError(
"Input Error: Only 4D input Tensors are supported (got {}D)".format(input.dim()))
  2992 def fold(input, output_size, kernel_size, dilation=1, padding=0, stride=1):
  2994     r"""Combines an array of sliding local blocks into a large containing  2998         Currently, only 4-D output tensors (batched image-like tensors) are  3001     See :class:`torch.nn.Fold` for details  3003     if input.dim() == 3:
  3004         msg = 
'{} must be int or 2-tuple for 3D input'  3005         assert_int_or_pair(output_size, 
'output_size', msg)
  3006         assert_int_or_pair(kernel_size, 
'kernel_size', msg)
  3007         assert_int_or_pair(dilation, 
'dilation', msg)
  3008         assert_int_or_pair(padding, 
'padding', msg)
  3009         assert_int_or_pair(stride, 
'stride', msg)
  3011         ret = torch._C._nn.thnn_col2im(input, _pair(output_size), _pair(kernel_size),
  3012                                        _pair(dilation), _pair(padding), _pair(stride))
  3014         raise NotImplementedError(
"Input Error: Only 3D input Tensors are supported (got {}D)".format(input.dim()))
  3020 def pad_circular(input, padding):
  3024         :param input: tensor of shape :math:`(N, C_{\text{in}}, H, [W, D]))`  3025         :param padding: (tuple): m-elem tuple where m is the degree of convolution  3027         :return: tensor of shape :math:`(N, C_{\text{in}}, [D + 2 * padding[0],  3028                  H + 2 * padding[1]], W + 2 * padding[2]))`  3031     input = torch.cat([input, input[:, :, 0:padding[-1]]], dim=2)
  3032     input = torch.cat([input[:, :, -(padding[-1] + padding[-2]):-padding[-1]], input], dim=2)
  3034     if len(padding) > 2:
  3035         input = torch.cat([input, input[:, :, :, 0:padding[-3]]], dim=3)
  3036         input = torch.cat([input[:, :, :, -(padding[-3] + padding[-4]):-padding[-3]], input], dim=3)
  3038     if len(padding) > 4:
  3039         input = torch.cat([input, input[:, :, :, :, 0:padding[-5]]], dim=4)
  3040         input = torch.cat([input[:, :, :, :, -(padding[-5] + padding[-6]):-padding[-5]], input], dim=4)
 
def annotate(the_type, the_value)
 
Module caffe2.python.helpers.dropout. 
 
def boolean_dispatch(arg_name, arg_index, default, if_true, if_false, module_name, func_name)