1 r"""Functional interface""" 2 from __future__
import division
9 from torch._C import _infer_size, _add_docstr
10 from .
import _reduction
as _Reduction
11 from .
import _functions
12 from .modules
import utils
13 from ._functions
import vision
14 from .modules.utils
import _single, _pair, _triple, _list_with_default
17 from .._jit_internal
import weak_script, List
20 conv1d = _add_docstr(torch.conv1d,
r""" 21 conv1d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros') -> Tensor 23 Applies a 1D convolution over an input signal composed of several input 26 See :class:`~torch.nn.Conv1d` for details and output shape. 28 .. include:: cudnn_deterministic.rst 31 input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)` 32 weight: filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kW)` 33 bias: optional bias of shape :math:`(\text{out\_channels})`. Default: ``None`` 34 stride: the stride of the convolving kernel. Can be a single number or 35 a one-element tuple `(sW,)`. Default: 1 36 padding: implicit paddings on both sides of the input. Can be a 37 single number or a one-element tuple `(padW,)`. Default: 0 38 dilation: the spacing between kernel elements. Can be a single number or 39 a one-element tuple `(dW,)`. Default: 1 40 groups: split input into groups, :math:`\text{in\_channels}` should be divisible by 41 the number of groups. Default: 1 42 padding_mode: the type of paddings applied to both sided can be: `zeros` or `circular`. Default: `zeros` 46 >>> filters = torch.randn(33, 16, 3) 47 >>> inputs = torch.randn(20, 16, 50) 48 >>> F.conv1d(inputs, filters) 51 conv2d = _add_docstr(torch.conv2d,
r""" 52 conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros') -> Tensor 54 Applies a 2D convolution over an input image composed of several input 57 See :class:`~torch.nn.Conv2d` for details and output shape. 59 .. include:: cudnn_deterministic.rst 62 input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)` 63 weight: filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kH , kW)` 64 bias: optional bias tensor of shape :math:`(\text{out\_channels})`. Default: ``None`` 65 stride: the stride of the convolving kernel. Can be a single number or a 66 tuple `(sH, sW)`. Default: 1 67 padding: implicit paddings on both sides of the input. Can be a 68 single number or a tuple `(padH, padW)`. Default: 0 69 dilation: the spacing between kernel elements. Can be a single number or 70 a tuple `(dH, dW)`. Default: 1 71 groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the 72 number of groups. Default: 1 73 padding_mode: the type of paddings applied to both sided can be: `zeros` or `circular`. Default: `zeros` 77 >>> # With square kernels and equal stride 78 >>> filters = torch.randn(8,4,3,3) 79 >>> inputs = torch.randn(1,4,5,5) 80 >>> F.conv2d(inputs, filters, padding=1) 83 conv3d = _add_docstr(torch.conv3d,
r""" 84 conv3d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros') -> Tensor 86 Applies a 3D convolution over an input image composed of several input 89 See :class:`~torch.nn.Conv3d` for details and output shape. 91 .. include:: cudnn_deterministic.rst 94 input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iT , iH , iW)` 95 weight: filters of shape :math:`(\text{out\_channels} , \frac{\text{in\_channels}}{\text{groups}} , kT , kH , kW)` 96 bias: optional bias tensor of shape :math:`(\text{out\_channels})`. Default: None 97 stride: the stride of the convolving kernel. Can be a single number or a 98 tuple `(sT, sH, sW)`. Default: 1 99 padding: implicit paddings on both sides of the input. Can be a 100 single number or a tuple `(padT, padH, padW)`. Default: 0 101 dilation: the spacing between kernel elements. Can be a single number or 102 a tuple `(dT, dH, dW)`. Default: 1 103 groups: split input into groups, :math:`\text{in\_channels}` should be divisible by 104 the number of groups. Default: 1 105 padding_mode: the type of paddings applied to both sided can be: `zeros` or `circular`. Default: `zeros` 109 >>> filters = torch.randn(33, 16, 3, 3, 3) 110 >>> inputs = torch.randn(20, 16, 50, 10, 20) 111 >>> F.conv3d(inputs, filters) 114 conv_transpose1d = _add_docstr(torch.conv_transpose1d,
r""" 115 conv_transpose1d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1) -> Tensor 117 Applies a 1D transposed convolution operator over an input signal 118 composed of several input planes, sometimes also called "deconvolution". 120 See :class:`~torch.nn.ConvTranspose1d` for details and output shape. 122 .. include:: cudnn_deterministic.rst 125 input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)` 126 weight: filters of shape :math:`(\text{in\_channels} , \frac{\text{out\_channels}}{\text{groups}} , kW)` 127 bias: optional bias of shape :math:`(\text{out\_channels})`. Default: None 128 stride: the stride of the convolving kernel. Can be a single number or a 129 tuple ``(sW,)``. Default: 1 130 padding: ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both 131 sides of each dimension in the input. Can be a single number or a tuple 132 ``(padW,)``. Default: 0 133 output_padding: additional size added to one side of each dimension in the 134 output shape. Can be a single number or a tuple ``(out_padW)``. Default: 0 135 groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the 136 number of groups. Default: 1 137 dilation: the spacing between kernel elements. Can be a single number or 138 a tuple ``(dW,)``. Default: 1 142 >>> inputs = torch.randn(20, 16, 50) 143 >>> weights = torch.randn(16, 33, 5) 144 >>> F.conv_transpose1d(inputs, weights) 147 conv_transpose2d = _add_docstr(torch.conv_transpose2d,
r""" 148 conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1) -> Tensor 150 Applies a 2D transposed convolution operator over an input image 151 composed of several input planes, sometimes also called "deconvolution". 153 See :class:`~torch.nn.ConvTranspose2d` for details and output shape. 155 .. include:: cudnn_deterministic.rst 158 input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)` 159 weight: filters of shape :math:`(\text{in\_channels} , \frac{\text{out\_channels}}{\text{groups}} , kH , kW)` 160 bias: optional bias of shape :math:`(\text{out\_channels})`. Default: None 161 stride: the stride of the convolving kernel. Can be a single number or a 162 tuple ``(sH, sW)``. Default: 1 163 padding: ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both 164 sides of each dimension in the input. Can be a single number or a tuple 165 ``(padH, padW)``. Default: 0 166 output_padding: additional size added to one side of each dimension in the 167 output shape. Can be a single number or a tuple ``(out_padH, out_padW)``. 169 groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the 170 number of groups. Default: 1 171 dilation: the spacing between kernel elements. Can be a single number or 172 a tuple ``(dH, dW)``. Default: 1 176 >>> # With square kernels and equal stride 177 >>> inputs = torch.randn(1, 4, 5, 5) 178 >>> weights = torch.randn(4, 8, 3, 3) 179 >>> F.conv_transpose2d(inputs, weights, padding=1) 182 conv_transpose3d = _add_docstr(torch.conv_transpose3d,
r""" 183 conv_transpose3d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1) -> Tensor 185 Applies a 3D transposed convolution operator over an input image 186 composed of several input planes, sometimes also called "deconvolution" 188 See :class:`~torch.nn.ConvTranspose3d` for details and output shape. 190 .. include:: cudnn_deterministic.rst 193 input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iT , iH , iW)` 194 weight: filters of shape :math:`(\text{in\_channels} , \frac{\text{out\_channels}}{\text{groups}} , kT , kH , kW)` 195 bias: optional bias of shape :math:`(\text{out\_channels})`. Default: None 196 stride: the stride of the convolving kernel. Can be a single number or a 197 tuple ``(sT, sH, sW)``. Default: 1 198 padding: ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both 199 sides of each dimension in the input. Can be a single number or a tuple 200 ``(padT, padH, padW)``. Default: 0 201 output_padding: additional size added to one side of each dimension in the 202 output shape. Can be a single number or a tuple 203 ``(out_padT, out_padH, out_padW)``. Default: 0 204 groups: split input into groups, :math:`\text{in\_channels}` should be divisible by the 205 number of groups. Default: 1 206 dilation: the spacing between kernel elements. Can be a single number or 207 a tuple `(dT, dH, dW)`. Default: 1 211 >>> inputs = torch.randn(20, 16, 50, 10, 20) 212 >>> weights = torch.randn(16, 33, 3, 3, 3) 213 >>> F.conv_transpose3d(inputs, weights) 216 conv_tbc = _add_docstr(torch.conv_tbc,
r""" 217 Applies a 1-dimensional sequence convolution over an input sequence. 218 Input and output dimensions are (Time, Batch, Channels) - hence TBC. 221 input: input tensor of shape :math:`(\text{sequence length} \times batch \times \text{in\_channels})` 222 weight: filter of shape (:math:`\text{kernel width} \times \text{in\_channels} \times \text{out\_channels}`) 223 bias: bias of shape (:math:`\text{out\_channels}`) 224 pad: number of timesteps to pad. Default: 0 229 avg_pool1d = _add_docstr(torch.avg_pool1d,
r""" 230 avg_pool1d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True) -> Tensor 232 Applies a 1D average pooling over an input signal composed of several 235 See :class:`~torch.nn.AvgPool1d` for details and output shape. 238 input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)` 239 kernel_size: the size of the window. Can be a single number or a 241 stride: the stride of the window. Can be a single number or a tuple 242 `(sW,)`. Default: :attr:`kernel_size` 243 padding: implicit zero paddings on both sides of the input. Can be a 244 single number or a tuple `(padW,)`. Default: 0 245 ceil_mode: when True, will use `ceil` instead of `floor` to compute the 246 output shape. Default: ``False`` 247 count_include_pad: when True, will include the zero-padding in the 248 averaging calculation. Default: ``True`` 252 >>> # pool of square window of size=3, stride=2 253 >>> input = torch.tensor([[[1, 2, 3, 4, 5, 6, 7]]], dtype=torch.float32) 254 >>> F.avg_pool1d(input, kernel_size=3, stride=2) 255 tensor([[[ 2., 4., 6.]]]) 260 avg_pool2d = _add_docstr(torch._C._nn.avg_pool2d,
r""" 261 avg_pool2d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True) -> Tensor 263 Applies 2D average-pooling operation in :math:`kH \times kW` regions by step size 264 :math:`sH \times sW` steps. The number of output features is equal to the number of 267 See :class:`~torch.nn.AvgPool2d` for details and output shape. 270 input: input tensor :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)` 271 kernel_size: size of the pooling region. Can be a single number or a 273 stride: stride of the pooling operation. Can be a single number or a 274 tuple `(sH, sW)`. Default: :attr:`kernel_size` 275 padding: implicit zero paddings on both sides of the input. Can be a 276 single number or a tuple `(padH, padW)`. Default: 0 277 ceil_mode: when True, will use `ceil` instead of `floor` in the formula 278 to compute the output shape. Default: ``False`` 279 count_include_pad: when True, will include the zero-padding in the 280 averaging calculation. Default: ``True`` 283 avg_pool3d = _add_docstr(torch._C._nn.avg_pool3d,
r""" 284 avg_pool3d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True) -> Tensor 286 Applies 3D average-pooling operation in :math:`kT \times kH \times kW` regions by step 287 size :math:`sT \times sH \times sW` steps. The number of output features is equal to 288 :math:`\lfloor\frac{\text{input planes}}{sT}\rfloor`. 290 See :class:`~torch.nn.AvgPool3d` for details and output shape. 293 input: input tensor :math:`(\text{minibatch} , \text{in\_channels} , iT \times iH , iW)` 294 kernel_size: size of the pooling region. Can be a single number or a 296 stride: stride of the pooling operation. Can be a single number or a 297 tuple `(sT, sH, sW)`. Default: :attr:`kernel_size` 298 padding: implicit zero paddings on both sides of the input. Can be a 299 single number or a tuple `(padT, padH, padW)`, Default: 0 300 ceil_mode: when True, will use `ceil` instead of `floor` in the formula 301 to compute the output shape 302 count_include_pad: when True, will include the zero-padding in the 303 averaging calculation 308 def fractional_max_pool2d_with_indices(input, kernel_size, output_size=None,
309 output_ratio=
None, return_indices=
False,
310 _random_samples=
None):
312 r"""Applies 2D fractional max pooling over an input signal composed of several input planes. 314 Fractional MaxPooling is described in detail in the paper `Fractional MaxPooling`_ by Ben Graham 316 The max-pooling operation is applied in :math:`kH \times kW` regions by a stochastic 317 step size determined by the target output size. 318 The number of output features is equal to the number of input planes. 321 kernel_size: the size of the window to take a max over. 322 Can be a single number :math:`k` (for a square kernel of :math:`k \times k`) 323 or a tuple `(kH, kW)` 324 output_size: the target output size of the image of the form :math:`oH \times oW`. 325 Can be a tuple `(oH, oW)` or a single number :math:`oH` for a square image :math:`oH \times oH` 326 output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given. 327 This has to be a number or tuple in the range (0, 1) 328 return_indices: if ``True``, will return the indices along with the outputs. 329 Useful to pass to :func:`~torch.nn.functional.max_unpool2d`. 332 >>> input = torch.randn(20, 16, 50, 32) 333 >>> # pool of square window of size=3, and target output size 13x12 334 >>> F.fractional_max_pool2d(input, 3, output_size=(13, 12)) 335 >>> # pool of square window and target output size being half of input image size 336 >>> F.fractional_max_pool2d(input, 3, output_ratio=(0.5, 0.5)) 338 .. _Fractional MaxPooling: 339 http://arxiv.org/abs/1412.6071 341 if output_size
is None and output_ratio
is None:
342 raise ValueError(
"fractional_max_pool2d requires specifying either " 343 "an output_size or an output_ratio")
344 if output_size
is None:
346 output_size = [int(input.size(2) * _output_ratio[0]),
347 int(input.size(3) * _output_ratio[1])]
349 if _random_samples
is None:
350 _random_samples = torch.rand(input.size(0), input.size(1), 2, dtype=input.dtype, device=input.device)
351 return torch._C._nn.fractional_max_pool2d(input, kernel_size, output_size, _random_samples)
355 def _fractional_max_pool2d(input, kernel_size, output_size=None,
356 output_ratio=
None, return_indices=
False,
357 _random_samples=
None):
359 return fractional_max_pool2d_with_indices(input, kernel_size, output_size,
360 output_ratio, return_indices,
364 arg_name=
'return_indices',
367 if_true=fractional_max_pool2d_with_indices,
368 if_false=_fractional_max_pool2d,
369 module_name=__name__,
370 func_name=
'fractional_max_pool2d')
374 def fractional_max_pool3d_with_indices(input, kernel_size, output_size=None,
375 output_ratio=
None, return_indices=
False,
376 _random_samples=
None):
378 r"""Applies 3D fractional max pooling over an input signal composed of several input planes. 380 Fractional MaxPooling is described in detail in the paper `Fractional MaxPooling`_ by Ben Graham 382 The max-pooling operation is applied in :math:`kT \times kH \times kW` regions by a stochastic 383 step size determined by the target output size. 384 The number of output features is equal to the number of input planes. 387 kernel_size: the size of the window to take a max over. 388 Can be a single number :math:`k` (for a square kernel of :math:`k \times k \times k`) 389 or a tuple `(kT, kH, kW)` 390 output_size: the target output size of the form :math:`oT \times oH \times oW`. 391 Can be a tuple `(oT, oH, oW)` or a single number :math:`oH` for a cubic output 392 :math:`oH \times oH \times oH` 393 output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given. 394 This has to be a number or tuple in the range (0, 1) 395 return_indices: if ``True``, will return the indices along with the outputs. 396 Useful to pass to :func:`~torch.nn.functional.max_unpool3d`. 399 >>> input = torch.randn(20, 16, 50, 32, 16) 400 >>> # pool of cubic window of size=3, and target output size 13x12x11 401 >>> F.fractional_max_pool3d(input, 3, output_size=(13, 12, 11)) 402 >>> # pool of cubic window and target output size being half of input size 403 >>> F.fractional_max_pool3d(input, 3, output_ratio=(0.5, 0.5, 0.5)) 405 .. _Fractional MaxPooling: 406 http://arxiv.org/abs/1412.6071 408 if output_size
is None and output_ratio
is None:
409 raise ValueError(
"fractional_max_pool3d requires specifying either " 410 "an output_size or an output_ratio")
411 if output_size
is None:
413 output_size = [int(input.size(2) * _output_ratio[0]),
414 int(input.size(3) * _output_ratio[1]),
415 int(input.size(4) * _output_ratio[2])]
417 if _random_samples
is None:
418 _random_samples = torch.rand(input.size(0), input.size(1), 3, dtype=input.dtype, device=input.device)
419 return torch._C._nn.fractional_max_pool3d(input, kernel_size, output_size, _random_samples)
423 def _fractional_max_pool3d(input, kernel_size, output_size=None,
424 output_ratio=
None, return_indices=
False,
425 _random_samples=
None):
427 return fractional_max_pool3d_with_indices(input, kernel_size, output_size,
428 output_ratio, return_indices,
432 arg_name=
'return_indices',
435 if_true=fractional_max_pool3d_with_indices,
436 if_false=_fractional_max_pool3d,
437 module_name=__name__,
438 func_name=
'fractional_max_pool3d')
442 def max_pool1d_with_indices(input, kernel_size, stride=None, padding=0,
443 dilation=1, ceil_mode=
False, return_indices=
False):
445 r"""Applies a 1D max pooling over an input signal composed of several input 448 See :class:`~torch.nn.MaxPool1d` for details. 452 return torch.max_pool1d_with_indices(
453 input, kernel_size, stride, padding, dilation, ceil_mode)
457 def _max_pool1d(input, kernel_size, stride=None, padding=0, dilation=1,
458 ceil_mode=
False, return_indices=
False):
460 return max_pool1d_with_indices(
461 input, kernel_size, stride, padding, dilation, ceil_mode)[0]
464 arg_name=
'return_indices',
467 if_true=max_pool1d_with_indices,
468 if_false=_max_pool1d,
469 module_name=__name__,
470 func_name=
'max_pool1d')
474 def max_pool2d_with_indices(input, kernel_size, stride=None, padding=0, dilation=1,
475 ceil_mode=
False, return_indices=
False):
477 r"""Applies a 2D max pooling over an input signal composed of several input 480 See :class:`~torch.nn.MaxPool2d` for details. 484 return torch._C._nn.max_pool2d_with_indices(input, kernel_size, stride, padding, dilation, ceil_mode)
488 def _max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1,
489 ceil_mode=
False, return_indices=
False):
491 return max_pool2d_with_indices(
492 input, kernel_size, stride, padding, dilation, ceil_mode)[0]
495 arg_name=
'return_indices',
498 if_true=max_pool2d_with_indices,
499 if_false=_max_pool2d,
500 module_name=__name__,
501 func_name=
'max_pool2d')
505 def max_pool3d_with_indices(input, kernel_size, stride=None, padding=0,
506 dilation=1, ceil_mode=
False, return_indices=
False):
508 r"""Applies a 3D max pooling over an input signal composed of several input 511 See :class:`~torch.nn.MaxPool3d` for details. 515 return torch._C._nn.max_pool3d_with_indices(
516 input, kernel_size, stride, padding, dilation, ceil_mode)
520 def _max_pool3d(input, kernel_size, stride=None, padding=0, dilation=1,
521 ceil_mode=
False, return_indices=
False):
523 return max_pool3d_with_indices(
524 input, kernel_size, stride, padding, dilation, ceil_mode)[0]
527 arg_name=
'return_indices',
530 if_true=max_pool3d_with_indices,
531 if_false=_max_pool3d,
532 module_name=__name__,
533 func_name=
'max_pool3d')
537 def _unpool_output_size(input, kernel_size, stride, padding, output_size):
539 input_size = input.size()
541 for d
in range(len(kernel_size)):
542 default_size.append((input_size[d + 2] - 1) * stride[d] +
543 kernel_size[d] - 2 * padding[d])
544 if output_size
is None:
547 if len(output_size) == len(kernel_size) + 2:
548 output_size = output_size[2:]
549 if len(output_size) != len(kernel_size):
550 raise ValueError(
"output_size should be a sequence containing " 551 "{} or {} elements, but it has a length of '{}'" 552 .format(len(kernel_size), len(kernel_size) + 2,
554 for d
in range(len(kernel_size)):
555 min_size = default_size[d] - stride[d]
556 max_size = default_size[d] + stride[d]
557 if not (min_size < output_size[d] < max_size):
559 'invalid output_size "{}" (dim {} must be between {} and {})' 560 .format(output_size, d, min_size, max_size))
567 def max_unpool1d(input, indices, kernel_size, stride=None, padding=0,
570 r"""Computes a partial inverse of :class:`MaxPool1d`. 572 See :class:`~torch.nn.MaxUnpool1d` for details. 574 kernel_size = _single(kernel_size)
575 if stride
is not None:
576 _stride = _single(stride)
578 _stride = kernel_size
579 padding = _single(padding)
580 output_size = _unpool_output_size(input, kernel_size, _stride, padding,
582 if isinstance(output_size, list):
583 output_size = output_size + [1]
585 output_size = output_size + (1,)
586 return torch._C._nn.max_unpool2d(input.unsqueeze(3), indices.unsqueeze(3),
587 output_size).squeeze(3)
591 def max_unpool2d(input, indices, kernel_size, stride=None, padding=0,
594 r"""Computes a partial inverse of :class:`MaxPool2d`. 596 See :class:`~torch.nn.MaxUnpool2d` for details. 598 kernel_size = _pair(kernel_size)
599 if stride
is not None:
600 _stride = _pair(stride)
602 _stride = kernel_size
603 padding = _pair(padding)
604 output_size = _unpool_output_size(input, kernel_size, _stride, padding,
606 return torch._C._nn.max_unpool2d(input, indices, output_size)
610 def max_unpool3d(input, indices, kernel_size, stride=None, padding=0,
613 r"""Computes a partial inverse of :class:`MaxPool3d`. 615 See :class:`~torch.nn.MaxUnpool3d` for details. 617 kernel_size = _triple(kernel_size)
618 if stride
is not None:
619 _stride = _triple(stride)
621 _stride = kernel_size
622 padding = _triple(padding)
623 output_size = _unpool_output_size(input, kernel_size, _stride, padding,
625 return torch._C._nn.max_unpool3d(
626 input, indices, output_size, _stride, padding)
630 def lp_pool2d(input, norm_type, kernel_size, stride=None, ceil_mode=False):
632 r"""Applies a 2D power-average pooling over an input signal composed of 633 several input planes. If the sum of all inputs to the power of `p` is 634 zero, the gradient is set to zero as well. 636 See :class:`~torch.nn.LPPool2d` for details. 638 kw, kh = utils._pair(kernel_size)
639 if stride
is not None:
640 out = avg_pool2d(input.pow(norm_type), kernel_size, stride, 0, ceil_mode)
642 out = avg_pool2d(input.pow(norm_type), kernel_size, padding=0, ceil_mode=ceil_mode)
644 return (torch.sign(out) * relu(torch.abs(out))).mul(kw * kh).pow(1. / norm_type)
648 def lp_pool1d(input, norm_type, kernel_size, stride=None, ceil_mode=False):
650 r"""Applies a 1D power-average pooling over an input signal composed of 651 several input planes. If the sum of all inputs to the power of `p` is 652 zero, the gradient is set to zero as well. 654 See :class:`~torch.nn.LPPool1d` for details. 656 if stride
is not None:
657 out = avg_pool1d(input.pow(norm_type), kernel_size, stride, 0, ceil_mode)
659 out = avg_pool1d(input.pow(norm_type), kernel_size, padding=0, ceil_mode=ceil_mode)
661 return (torch.sign(out) * relu(torch.abs(out))).mul(kernel_size).pow(1. / norm_type)
665 def adaptive_max_pool1d_with_indices(input, output_size, return_indices=False):
667 r"""Applies a 1D adaptive max pooling over an input signal composed of 668 several input planes. 670 See :class:`~torch.nn.AdaptiveMaxPool1d` for details and output shape. 673 output_size: the target output size (single integer) 674 return_indices: whether to return pooling indices. Default: ``False`` 676 return torch.adaptive_max_pool1d(input, output_size)
680 def _adaptive_max_pool1d(input, output_size, return_indices=False):
682 return adaptive_max_pool1d_with_indices(input, output_size)[0]
685 arg_name=
'return_indices',
688 if_true=adaptive_max_pool1d_with_indices,
689 if_false=_adaptive_max_pool1d,
690 module_name=__name__,
691 func_name=
'adaptive_max_pool1d')
695 def adaptive_max_pool2d_with_indices(input, output_size, return_indices=False):
697 r"""Applies a 2D adaptive max pooling over an input signal composed of 698 several input planes. 700 See :class:`~torch.nn.AdaptiveMaxPool2d` for details and output shape. 703 output_size: the target output size (single integer or 704 double-integer tuple) 705 return_indices: whether to return pooling indices. Default: ``False`` 707 output_size = _list_with_default(output_size, input.size())
708 return torch._C._nn.adaptive_max_pool2d(input, output_size)
712 def _adaptive_max_pool2d(input, output_size, return_indices=False):
714 return adaptive_max_pool2d_with_indices(input, output_size)[0]
717 arg_name=
'return_indices',
720 if_true=adaptive_max_pool2d_with_indices,
721 if_false=_adaptive_max_pool2d,
722 module_name=__name__,
723 func_name=
'adaptive_max_pool2d')
727 def adaptive_max_pool3d_with_indices(input, output_size, return_indices=False):
729 r"""Applies a 3D adaptive max pooling over an input signal composed of 730 several input planes. 732 See :class:`~torch.nn.AdaptiveMaxPool3d` for details and output shape. 735 output_size: the target output size (single integer or 736 triple-integer tuple) 737 return_indices: whether to return pooling indices. Default: ``False`` 739 output_size = _list_with_default(output_size, input.size())
740 return torch._C._nn.adaptive_max_pool3d(input, output_size)
744 def _adaptive_max_pool3d(input, output_size, return_indices=False):
746 return adaptive_max_pool3d_with_indices(input, output_size)[0]
749 arg_name=
'return_indices',
752 if_true=adaptive_max_pool3d_with_indices,
753 if_false=_adaptive_max_pool3d,
754 module_name=__name__,
755 func_name=
'adaptive_max_pool3d')
758 adaptive_avg_pool1d = _add_docstr(torch.adaptive_avg_pool1d,
r""" 759 adaptive_avg_pool1d(input, output_size) -> Tensor 761 Applies a 1D adaptive average pooling over an input signal composed of 762 several input planes. 764 See :class:`~torch.nn.AdaptiveAvgPool1d` for details and output shape. 767 output_size: the target output size (single integer) 772 def adaptive_avg_pool2d(input, output_size):
775 Applies a 2D adaptive average pooling over an input signal composed of 776 several input planes. 778 See :class:`~torch.nn.AdaptiveAvgPool2d` for details and output shape. 781 output_size: the target output size (single integer or 782 double-integer tuple) 784 _output_size = _list_with_default(output_size, input.size())
785 return torch._C._nn.adaptive_avg_pool2d(input, _output_size)
789 def adaptive_avg_pool3d(input, output_size):
792 Applies a 3D adaptive average pooling over an input signal composed of 793 several input planes. 795 See :class:`~torch.nn.AdaptiveAvgPool3d` for details and output shape. 798 output_size: the target output size (single integer or 799 triple-integer tuple) 801 _output_size = _list_with_default(output_size, input.size())
802 return torch._C._nn.adaptive_avg_pool3d(input, _output_size)
807 def dropout(input, p=0.5, training=True, inplace=False):
810 During training, randomly zeroes some of the elements of the input 811 tensor with probability :attr:`p` using samples from a Bernoulli 814 See :class:`~torch.nn.Dropout` for details. 817 p: probability of an element to be zeroed. Default: 0.5 818 training: apply dropout if is ``True``. Default: ``True`` 819 inplace: If set to ``True``, will do this operation in-place. Default: ``False`` 822 raise ValueError(
"dropout probability has to be between 0 and 1, " 823 "but got {}".format(p))
824 return (_VF.dropout_(input, p, training)
826 else _VF.dropout(input, p, training))
830 def alpha_dropout(input, p=0.5, training=False, inplace=False):
832 r"""Applies alpha dropout to the input. 834 See :class:`~torch.nn.AlphaDropout` for details. 837 raise ValueError(
"dropout probability has to be between 0 and 1, " 838 "but got {}".format(p))
839 return (_VF.alpha_dropout_(input, p, training)
841 else _VF.alpha_dropout(input, p, training))
845 def dropout2d(input, p=0.5, training=True, inplace=False):
848 Randomly zero out entire channels (a channel is a 2D feature map, 849 e.g., the :math:`j`-th channel of the :math:`i`-th sample in the 850 batched input is a 2D tensor :math:`\text{input}[i, j]`) of the input tensor). 851 Each channel will be zeroed out independently on every forward call with 852 probability :attr:`p` using samples from a Bernoulli distribution. 854 See :class:`~torch.nn.Dropout2d` for details. 857 p: probability of a channel to be zeroed. Default: 0.5 858 training: apply dropout if is ``True``. Default: ``True`` 859 inplace: If set to ``True``, will do this operation in-place. Default: ``False`` 862 raise ValueError(
"dropout probability has to be between 0 and 1, " 863 "but got {}".format(p))
864 return (_VF.feature_dropout_(input, p, training)
866 else _VF.feature_dropout(input, p, training))
870 def dropout3d(input, p=0.5, training=True, inplace=False):
873 Randomly zero out entire channels (a channel is a 3D feature map, 874 e.g., the :math:`j`-th channel of the :math:`i`-th sample in the 875 batched input is a 3D tensor :math:`\text{input}[i, j]`) of the input tensor). 876 Each channel will be zeroed out independently on every forward call with 877 probability :attr:`p` using samples from a Bernoulli distribution. 879 See :class:`~torch.nn.Dropout3d` for details. 882 p: probability of a channel to be zeroed. Default: 0.5 883 training: apply dropout if is ``True``. Default: ``True`` 884 inplace: If set to ``True``, will do this operation in-place. Default: ``False`` 889 raise ValueError(
"dropout probability has to be between 0 and 1, " 890 "but got {}".format(p))
891 return (_VF.feature_dropout_(input, p, training)
893 else _VF.feature_dropout(input, p, training))
897 def feature_alpha_dropout(input, p=0.5, training=False, inplace=False):
900 raise ValueError(
"dropout probability has to be between 0 and 1, " 901 "but got {}".format(p))
902 return (_VF.feature_alpha_dropout_(input, p, training)
904 else _VF.feature_alpha_dropout(input, p, training))
908 def threshold(input, threshold, value, inplace=False):
910 r"""Thresholds each element of the input Tensor. 912 See :class:`~torch.nn.Threshold` for more details. 915 result = _VF.threshold_(input, threshold, value)
917 result = _VF.threshold(input, threshold, value)
921 threshold_ = _add_docstr(_VF.threshold_,
r""" 922 threshold_(input, threshold, value) -> Tensor 924 In-place version of :func:`~threshold`. 929 def relu(input, inplace=False):
931 r"""relu(input, inplace=False) -> Tensor 933 Applies the rectified linear unit function element-wise. See 934 :class:`~torch.nn.ReLU` for more details. 937 result = torch.relu_(input)
939 result = torch.relu(input)
943 relu_ = _add_docstr(torch.relu_,
r""" 944 relu_(input) -> Tensor 946 In-place version of :func:`~relu`. 951 def glu(input, dim=-1):
954 glu(input, dim=-1) -> Tensor 956 The gated linear unit. Computes: 959 \text{GLU}(a, b) = a \otimes \sigma(b) 961 where `input` is split in half along `dim` to form `a` and `b`, :math:`\sigma` 962 is the sigmoid function and :math:`\otimes` is the element-wise product between matrices. 964 See `Language Modeling with Gated Convolutional Networks <https://arxiv.org/abs/1612.08083>`_. 967 input (Tensor): input tensor 968 dim (int): dimension on which to split the input. Default: -1 971 raise RuntimeError(
"glu does not suppport scalars because halving size must be even")
972 return torch._C._nn.glu(input, dim)
976 def hardtanh(input, min_val=-1., max_val=1., inplace=False):
979 hardtanh(input, min_val=-1., max_val=1., inplace=False) -> Tensor 981 Applies the HardTanh function element-wise. See :class:`~torch.nn.Hardtanh` for more 985 result = torch._C._nn.hardtanh_(input, min_val, max_val)
987 result = torch._C._nn.hardtanh(input, min_val, max_val)
991 hardtanh_ = _add_docstr(torch._C._nn.hardtanh_,
r""" 992 hardtanh_(input, min_val=-1., max_val=1.) -> Tensor 994 In-place version of :func:`~hardtanh`. 999 def relu6(input, inplace=False):
1001 r"""relu6(input, inplace=False) -> Tensor 1003 Applies the element-wise function :math:`\text{ReLU6}(x) = \min(\max(0,x), 6)`. 1005 See :class:`~torch.nn.ReLU6` for more details. 1007 return hardtanh(input, 0., 6., inplace)
1011 def elu(input, alpha=1., inplace=False):
1013 r"""Applies element-wise, 1014 :math:`\text{ELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x) - 1))`. 1016 See :class:`~torch.nn.ELU` for more details. 1019 result = torch._C._nn.elu_(input, alpha)
1021 result = torch._C._nn.elu(input, alpha)
1025 elu_ = _add_docstr(torch._C._nn.elu_,
r""" 1026 elu_(input, alpha=1.) -> Tensor 1028 In-place version of :func:`~elu`. 1033 def selu(input, inplace=False):
1035 r"""selu(input, inplace=False) -> Tensor 1037 Applies element-wise, 1038 :math:`\text{SELU}(x) = scale * (\max(0,x) + \min(0, \alpha * (\exp(x) - 1)))`, 1039 with :math:`\alpha=1.6732632423543772848170429916717` and 1040 :math:`scale=1.0507009873554804934193349852946`. 1042 See :class:`~torch.nn.SELU` for more details. 1045 result = torch.selu_(input)
1047 result = torch.selu(input)
1051 selu_ = _add_docstr(torch.selu_,
r""" 1052 selu_(input) -> Tensor 1054 In-place version of :func:`~selu`. 1059 def celu(input, alpha=1., inplace=False):
1061 r"""celu(input, alpha=1., inplace=False) -> Tensor 1063 Applies element-wise, 1064 :math:`\text{CELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x/\alpha) - 1))`. 1066 See :class:`~torch.nn.CELU` for more details. 1069 result = torch.celu_(input, alpha)
1071 result = torch.celu(input, alpha)
1074 celu_ = _add_docstr(torch.celu_,
r""" 1075 celu_(input, alpha=1.) -> Tensor 1077 In-place version of :func:`~celu`. 1082 def leaky_relu(input, negative_slope=0.01, inplace=False):
1085 leaky_relu(input, negative_slope=0.01, inplace=False) -> Tensor 1087 Applies element-wise, 1088 :math:`\text{LeakyReLU}(x) = \max(0, x) + \text{negative\_slope} * \min(0, x)` 1090 See :class:`~torch.nn.LeakyReLU` for more details. 1093 result = torch._C._nn.leaky_relu_(input, negative_slope)
1095 result = torch._C._nn.leaky_relu(input, negative_slope)
1099 leaky_relu_ = _add_docstr(torch._C._nn.leaky_relu_,
r""" 1100 leaky_relu_(input, negative_slope=0.01) -> Tensor 1102 In-place version of :func:`~leaky_relu`. 1107 def prelu(input, weight):
1109 r"""prelu(input, weight) -> Tensor 1111 Applies element-wise the function 1112 :math:`\text{PReLU}(x) = \max(0,x) + \text{weight} * \min(0,x)` where weight is a 1113 learnable parameter. 1115 See :class:`~torch.nn.PReLU` for more details. 1117 return torch.prelu(input, weight)
1121 def rrelu(input, lower=1. / 8, upper=1. / 3, training=False, inplace=False):
1123 r"""rrelu(input, lower=1./8, upper=1./3, training=False, inplace=False) -> Tensor 1125 Randomized leaky ReLU. 1127 See :class:`~torch.nn.RReLU` for more details. 1130 result = torch.rrelu_(input, lower, upper, training)
1132 result = torch.rrelu(input, lower, upper, training)
1136 rrelu_ = _add_docstr(torch.rrelu_,
r""" 1137 rrelu_(input, lower=1./8, upper=1./3, training=False) -> Tensor 1139 In-place version of :func:`~rrelu`. 1142 logsigmoid = _add_docstr(torch._C._nn.log_sigmoid,
r""" 1143 logsigmoid(input) -> Tensor 1145 Applies element-wise :math:`\text{LogSigmoid}(x_i) = \log \left(\frac{1}{1 + \exp(-x_i)}\right)` 1147 See :class:`~torch.nn.LogSigmoid` for more details. 1152 def hardshrink(input, lambd=0.5):
1155 hardshrink(input, lambd=0.5) -> Tensor 1157 Applies the hard shrinkage function element-wise 1159 See :class:`~torch.nn.Hardshrink` for more details. 1161 return torch.hardshrink(input, lambd)
1165 def tanhshrink(input):
1166 r"""tanhshrink(input) -> Tensor 1168 Applies element-wise, :math:`\text{Tanhshrink}(x) = x - \text{Tanh}(x)` 1170 See :class:`~torch.nn.Tanhshrink` for more details. 1172 return input - input.tanh()
1176 def softsign(input):
1177 r"""softsign(input) -> Tensor 1179 Applies element-wise, the function :math:`\text{SoftSign}(x) = \frac{x}{1 + |x|}` 1181 See :class:`~torch.nn.Softsign` for more details. 1183 return input / (input.abs() + 1)
1186 softplus = _add_docstr(torch._C._nn.softplus,
r""" 1187 softplus(input, beta=1, threshold=20) -> Tensor 1192 def _get_softmax_dim(name, ndim, stacklevel):
1194 warnings.warn(
"Implicit dimension choice for {} has been deprecated. " 1195 "Change the call to include dim=X as an argument.".format(name), stacklevel=stacklevel)
1196 if ndim == 0
or ndim == 1
or ndim == 3:
1204 def softmin(input, dim=None, _stacklevel=3, dtype=None):
1206 r"""Applies a softmin function. 1208 Note that :math:`\text{Softmin}(x) = \text{Softmax}(-x)`. See softmax definition for mathematical formula. 1210 See :class:`~torch.nn.Softmin` for more details. 1213 input (Tensor): input 1214 dim (int): A dimension along which softmin will be computed (so every slice 1215 along dim will sum to 1). 1216 dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor. 1217 If specified, the input tensor is casted to :attr:`dtype` before the operation 1218 is performed. This is useful for preventing data type overflows. Default: None. 1221 dim = _get_softmax_dim(
'softmin', input.dim(), _stacklevel)
1223 ret = (-input).softmax(dim)
1225 ret = (-input).softmax(dim, dtype=dtype)
1230 def softmax(input, dim=None, _stacklevel=3, dtype=None):
1232 r"""Applies a softmax function. 1234 Softmax is defined as: 1236 :math:`\text{Softmax}(x_{i}) = \frac{exp(x_i)}{\sum_j exp(x_j)}` 1238 It is applied to all slices along dim, and will re-scale them so that the elements 1239 lie in the range `[0, 1]` and sum to 1. 1241 See :class:`~torch.nn.Softmax` for more details. 1244 input (Tensor): input 1245 dim (int): A dimension along which softmax will be computed. 1246 dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor. 1247 If specified, the input tensor is casted to :attr:`dtype` before the operation 1248 is performed. This is useful for preventing data type overflows. Default: None. 1251 This function doesn't work directly with NLLLoss, 1252 which expects the Log to be computed between the Softmax and itself. 1253 Use log_softmax instead (it's faster and has better numerical properties). 1257 dim = _get_softmax_dim(
'softmax', input.dim(), _stacklevel)
1259 ret = input.softmax(dim)
1261 ret = input.softmax(dim, dtype=dtype)
1266 def gumbel_softmax(logits, tau=1, hard=False, eps=1e-10, dim=-1):
1269 Samples from the `Gumbel-Softmax distribution`_ and optionally discretizes. 1272 logits: `[..., num_features]` unnormalized log probabilities 1273 tau: non-negative scalar temperature 1274 hard: if ``True``, the returned samples will be discretized as one-hot vectors, 1275 but will be differentiated as if it is the soft sample in autograd 1276 dim (int): A dimension along which softmax will be computed. Default: -1. 1279 Sampled tensor of same shape as `logits` from the Gumbel-Softmax distribution. 1280 If ``hard=True``, the returned samples will be one-hot, otherwise they will 1281 be probability distributions that sum to 1 across `dim`. 1284 This function is here for legacy reasons, may be removed from nn.Functional in the future. 1287 The main trick for `hard` is to do `y_hard - y_soft.detach() + y_soft` 1289 It achieves two things: 1290 - makes the output value exactly one-hot 1291 (since we add then subtract y_soft value) 1292 - makes the gradient equal to y_soft gradient 1293 (since we strip all other gradients) 1296 >>> logits = torch.randn(20, 32) 1297 >>> # Sample soft categorical using reparametrization trick: 1298 >>> F.gumbel_softmax(logits, tau=1, hard=False) 1299 >>> # Sample hard categorical using "Straight-through" trick: 1300 >>> F.gumbel_softmax(logits, tau=1, hard=True) 1302 .. _Gumbel-Softmax distribution: 1303 https://arxiv.org/abs/1611.00712 1304 https://arxiv.org/abs/1611.01144 1308 warnings.warn(
"`eps` parameter is deprecated and has no effect.")
1310 gumbels = -torch.empty_like(logits).exponential_().log()
1311 gumbels = (logits + gumbels) / tau
1312 y_soft = gumbels.softmax(dim)
1316 index = y_soft.max(dim, keepdim=
True)[1]
1317 y_hard = torch.zeros_like(logits).scatter_(dim, index, 1.0)
1318 ret = y_hard - y_soft.detach() + y_soft
1326 def log_softmax(input, dim=None, _stacklevel=3, dtype=None):
1328 r"""Applies a softmax followed by a logarithm. 1330 While mathematically equivalent to log(softmax(x)), doing these two 1331 operations separately is slower, and numerically unstable. This function 1332 uses an alternative formulation to compute the output and gradient correctly. 1334 See :class:`~torch.nn.LogSoftmax` for more details. 1337 input (Tensor): input 1338 dim (int): A dimension along which log_softmax will be computed. 1339 dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor. 1340 If specified, the input tensor is casted to :attr:`dtype` before the operation 1341 is performed. This is useful for preventing data type overflows. Default: None. 1344 dim = _get_softmax_dim(
'log_softmax', input.dim(), _stacklevel)
1346 ret = input.log_softmax(dim)
1348 ret = input.log_softmax(dim, dtype=dtype)
1352 softshrink = _add_docstr(torch._C._nn.softshrink,
r""" 1353 softshrink(input, lambd=0.5) -> Tensor 1355 Applies the soft shrinkage function elementwise 1357 See :class:`~torch.nn.Softshrink` for more details. 1363 r"""tanh(input) -> Tensor 1365 Applies element-wise, 1366 :math:`\text{Tanh}(x) = \tanh(x) = \frac{\exp(x) - \exp(-x)}{\exp(x) + \exp(-x)}` 1368 See :class:`~torch.nn.Tanh` for more details. 1370 warnings.warn(
"nn.functional.tanh is deprecated. Use torch.tanh instead.")
1376 r"""sigmoid(input) -> Tensor 1378 Applies the element-wise function :math:`\text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)}` 1380 See :class:`~torch.nn.Sigmoid` for more details. 1382 warnings.warn(
"nn.functional.sigmoid is deprecated. Use torch.sigmoid instead.")
1383 return input.sigmoid()
1387 def linear(input, weight, bias=None):
1390 Applies a linear transformation to the incoming data: :math:`y = xA^T + b`. 1394 - Input: :math:`(N, *, in\_features)` where `*` means any number of 1395 additional dimensions 1396 - Weight: :math:`(out\_features, in\_features)` 1397 - Bias: :math:`(out\_features)` 1398 - Output: :math:`(N, *, out\_features)` 1400 if input.dim() == 2
and bias
is not None:
1402 ret = torch.addmm(bias, input, weight.t())
1404 output = input.matmul(weight.t())
1405 if bias
is not None:
1412 def bilinear(input1, input2, weight, bias=None):
1414 return torch.bilinear(input1, input2, weight, bias)
1417 def _no_grad_embedding_renorm_(weight, input, max_norm, norm_type):
1419 with torch.no_grad():
1420 return torch.embedding_renorm_(weight, input, max_norm, norm_type)
1424 def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.,
1425 scale_grad_by_freq=
False, sparse=
False):
1427 r"""A simple lookup table that looks up embeddings in a fixed dictionary and size. 1429 This module is often used to retrieve word embeddings using indices. 1430 The input to the module is a list of indices, and the embedding matrix, 1431 and the output is the corresponding word embeddings. 1433 See :class:`torch.nn.Embedding` for more details. 1436 input (LongTensor): Tensor containing indices into the embedding matrix 1437 weight (Tensor): The embedding matrix with number of rows equal to the maximum possible index + 1, 1438 and number of columns equal to the embedding size 1439 padding_idx (int, optional): If given, pads the output with the embedding vector at :attr:`padding_idx` 1440 (initialized to zeros) whenever it encounters the index. 1441 max_norm (float, optional): If given, each embedding vector with norm larger than :attr:`max_norm` 1442 is renormalized to have norm :attr:`max_norm`. 1443 Note: this will modify :attr:`weight` in-place. 1444 norm_type (float, optional): The p of the p-norm to compute for the :attr:`max_norm` option. Default ``2``. 1445 scale_grad_by_freq (boolean, optional): If given, this will scale gradients by the inverse of frequency of 1446 the words in the mini-batch. Default ``False``. 1447 sparse (bool, optional): If ``True``, gradient w.r.t. :attr:`weight` will be a sparse tensor. See Notes under 1448 :class:`torch.nn.Embedding` for more details regarding sparse gradients. 1451 - Input: LongTensor of arbitrary shape containing the indices to extract 1452 - Weight: Embedding matrix of floating point type with shape `(V, embedding_dim)`, 1453 where V = maximum index + 1 and embedding_dim = the embedding size 1454 - Output: `(*, embedding_dim)`, where `*` is the input shape 1458 >>> # a batch of 2 samples of 4 indices each 1459 >>> input = torch.tensor([[1,2,4,5],[4,3,2,9]]) 1460 >>> # an embedding matrix containing 10 tensors of size 3 1461 >>> embedding_matrix = torch.rand(10, 3) 1462 >>> F.embedding(input, embedding_matrix) 1463 tensor([[[ 0.8490, 0.9625, 0.6753], 1464 [ 0.9666, 0.7761, 0.6108], 1465 [ 0.6246, 0.9751, 0.3618], 1466 [ 0.4161, 0.2419, 0.7383]], 1468 [[ 0.6246, 0.9751, 0.3618], 1469 [ 0.0237, 0.7794, 0.0528], 1470 [ 0.9666, 0.7761, 0.6108], 1471 [ 0.3385, 0.8612, 0.1867]]]) 1473 >>> # example with padding_idx 1474 >>> weights = torch.rand(10, 3) 1475 >>> weights[0, :].zero_() 1476 >>> embedding_matrix = weights 1477 >>> input = torch.tensor([[0,2,0,5]]) 1478 >>> F.embedding(input, embedding_matrix, padding_idx=0) 1479 tensor([[[ 0.0000, 0.0000, 0.0000], 1480 [ 0.5609, 0.5384, 0.8720], 1481 [ 0.0000, 0.0000, 0.0000], 1482 [ 0.6262, 0.2438, 0.7471]]]) 1484 if padding_idx
is not None:
1486 assert padding_idx < weight.size(0),
'Padding_idx must be within num_embeddings' 1487 elif padding_idx < 0:
1488 assert padding_idx >= -weight.size(0),
'Padding_idx must be within num_embeddings' 1489 padding_idx = weight.size(0) + padding_idx
1492 if max_norm
is not None:
1496 input = input.contiguous()
1501 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
1502 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
1506 def embedding_bag(input, weight, offsets=None, max_norm=None, norm_type=2,
1507 scale_grad_by_freq=
False, mode=
'mean', sparse=
False):
1509 r"""Computes sums, means or maxes of `bags` of embeddings, without instantiating the 1510 intermediate embeddings. 1512 See :class:`torch.nn.EmbeddingBag` for more details. 1514 .. include:: cuda_deterministic_backward.rst 1517 input (LongTensor): Tensor containing bags of indices into the embedding matrix 1518 weight (Tensor): The embedding matrix with number of rows equal to the maximum possible index + 1, 1519 and number of columns equal to the embedding size 1520 offsets (LongTensor, optional): Only used when :attr:`input` is 1D. :attr:`offsets` determines 1521 the starting index position of each bag (sequence) in :attr:`input`. 1522 max_norm (float, optional): If given, each embedding vector with norm larger than :attr:`max_norm` 1523 is renormalized to have norm :attr:`max_norm`. 1524 Note: this will modify :attr:`weight` in-place. 1525 norm_type (float, optional): The ``p`` in the ``p``-norm to compute for the :attr:`max_norm` option. 1527 scale_grad_by_freq (boolean, optional): if given, this will scale gradients by the inverse of frequency of 1528 the words in the mini-batch. Default ``False``. 1529 Note: this option is not supported when ``mode="max"``. 1530 mode (string, optional): ``"sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag. 1532 sparse (bool, optional): if ``True``, gradient w.r.t. :attr:`weight` will be a sparse tensor. See Notes under 1533 :class:`torch.nn.Embedding` for more details regarding sparse gradients. 1534 Note: this option is not supported when ``mode="max"``. 1538 - :attr:`input` (LongTensor) and :attr:`offsets` (LongTensor, optional) 1540 - If :attr:`input` is 2D of shape `(B, N)`, 1542 it will be treated as ``B`` bags (sequences) each of fixed length ``N``, and 1543 this will return ``B`` values aggregated in a way depending on the :attr:`mode`. 1544 :attr:`offsets` is ignored and required to be ``None`` in this case. 1546 - If :attr:`input` is 1D of shape `(N)`, 1548 it will be treated as a concatenation of multiple bags (sequences). 1549 :attr:`offsets` is required to be a 1D tensor containing the 1550 starting index positions of each bag in :attr:`input`. Therefore, 1551 for :attr:`offsets` of shape `(B)`, :attr:`input` will be viewed as 1552 having ``B`` bags. Empty bags (i.e., having 0-length) will have 1553 returned vectors filled by zeros. 1555 - :attr:`weight` (Tensor): the learnable weights of the module of 1556 shape `(num_embeddings, embedding_dim)` 1558 - :attr:`output`: aggregated embedding values of shape `(B, embedding_dim)` 1562 >>> # an Embedding module containing 10 tensors of size 3 1563 >>> embedding_matrix = torch.rand(10, 3) 1564 >>> # a batch of 2 samples of 4 indices each 1565 >>> input = torch.tensor([1,2,4,5,4,3,2,9]) 1566 >>> offsets = torch.tensor([0,4]) 1567 >>> F.embedding_bag(embedding_matrix, input, offsets) 1568 tensor([[ 0.3397, 0.3552, 0.5545], 1569 [ 0.5893, 0.4386, 0.5882]]) 1574 if weight.dtype == torch.long
and input.is_floating_point():
1575 warnings.warn(
"Argument order of nn.functional.embedding_bag was changed. " 1576 "Usage `embedding_bag(weight, input, ...)` is deprecated, " 1577 "and should now be `embedding_bag(input, weight, ...)`.")
1578 weight, input = input, weight
1580 if input.dim() == 2:
1581 if offsets
is not None:
1582 raise ValueError(
"if input is 2D, then offsets has to be None" 1583 ", as input is treated is a mini-batch of" 1584 " fixed length sequences. However, found " 1585 "offsets of type {}".format(type(offsets)))
1587 offsets = torch.arange(0, input.numel(), input.size(1),
1588 dtype=torch.long, device=input.device)
1590 input = input.reshape(-1)
1591 elif input.dim() == 1:
1593 raise ValueError(
"offsets has to be a 1D Tensor but got None")
1595 if offsets.dim() != 1:
1596 raise ValueError(
"offsets has to be a 1D Tensor")
1597 if int(offsets[0]) != 0:
1598 raise ValueError(
"offsets[0] has to be 0, i.e., the first sequence " 1599 "in the mini-batch has to start from position 0. " 1600 "However, got {}".format(offsets[0].item()))
1601 if int(offsets[-1]) > input.size(0):
1602 raise ValueError(
"offsets[-1] can not be greater than input's length" 1603 " ({}), but got offsets[-1] of {}" 1604 .format(input.size(0), offsets[-1].item()))
1606 raise ValueError(
"input has to be 1D or 2D Tensor," 1607 " but got Tensor of dimension {}".format(input.dim()))
1611 elif mode ==
'mean':
1616 if scale_grad_by_freq:
1617 raise ValueError(
"max mode does not support scaling the gradient by the frequency")
1620 raise ValueError(
"max mode does not support sparse weights")
1624 raise ValueError(
"mode has to be one of sum, mean or max")
1626 if max_norm
is not None:
1631 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
1633 ret, _, _, _ = torch.embedding_bag(
1644 def batch_norm(input, running_mean, running_var, weight=None, bias=None,
1645 training=
False, momentum=0.1, eps=1e-5):
1647 r"""Applies Batch Normalization for each channel across a batch of data. 1649 See :class:`~torch.nn.BatchNorm1d`, :class:`~torch.nn.BatchNorm2d`, 1650 :class:`~torch.nn.BatchNorm3d` for details. 1663 size_prods = size[0]
1664 for i
in range(len(size) - 2):
1665 size_prods *= size[i + 2]
1667 raise ValueError(
'Expected more than 1 value per channel when training, got input size {}'.format(size))
1669 return torch.batch_norm(
1670 input, weight, bias, running_mean, running_var,
1671 training, momentum, eps, torch.backends.cudnn.enabled
1676 def instance_norm(input, running_mean=None, running_var=None, weight=None,
1677 bias=
None, use_input_stats=
True, momentum=0.1, eps=1e-5):
1679 r"""Applies Instance Normalization for each channel in each data sample in a 1682 See :class:`~torch.nn.InstanceNorm1d`, :class:`~torch.nn.InstanceNorm2d`, 1683 :class:`~torch.nn.InstanceNorm3d` for details. 1685 return torch.instance_norm(
1686 input, weight, bias, running_mean, running_var,
1687 use_input_stats, momentum, eps, torch.backends.cudnn.enabled
1692 def layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-5):
1694 r"""Applies Layer Normalization for last certain number of dimensions. 1696 See :class:`~torch.nn.LayerNorm` for details. 1698 return torch.layer_norm(input, normalized_shape, weight, bias, eps,
1699 torch.backends.cudnn.enabled)
1703 def group_norm(input, num_groups, weight=None, bias=None, eps=1e-5):
1705 r"""Applies Group Normalization for last certain number of dimensions. 1707 See :class:`~torch.nn.GroupNorm` for details. 1709 return torch.group_norm(input, num_groups, weight, bias, eps,
1710 torch.backends.cudnn.enabled)
1714 def local_response_norm(input, size, alpha=1e-4, beta=0.75, k=1.):
1716 r"""Applies local response normalization over an input signal composed of 1717 several input planes, where channels occupy the second dimension. 1718 Applies normalization across channels. 1720 See :class:`~torch.nn.LocalResponseNorm` for details. 1724 raise ValueError(
'Expected 3D or higher dimensionality \ 1725 input (got {} dimensions)'.format(dim))
1726 div = input.mul(input).unsqueeze(1)
1728 div = pad(div, (0, 0, size // 2, (size - 1) // 2))
1729 div = avg_pool2d(div, (size, 1), stride=1).squeeze(1)
1731 sizes = input.size()
1732 div = div.view(sizes[0], 1, sizes[1], sizes[2], -1)
1733 div = pad(div, (0, 0, 0, 0, size // 2, (size - 1) // 2))
1734 div = avg_pool3d(div, (size, 1, 1), stride=1).squeeze(1)
1735 div = div.view(sizes)
1736 div = div.mul(alpha).add(k).pow(beta)
1743 def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0,
1744 reduction=
'mean', zero_infinity=
False):
1746 r"""The Connectionist Temporal Classification loss. 1748 See :class:`~torch.nn.CTCLoss` for details. 1750 .. include:: cudnn_deterministic.rst 1751 .. include:: cuda_deterministic_backward.rst 1754 log_probs: :math:`(T, N, C)` where `C = number of characters in alphabet including blank`, 1755 `T = input length`, and `N = batch size`. 1756 The logarithmized probabilities of the outputs 1757 (e.g. obtained with :func:`torch.nn.functional.log_softmax`). 1758 targets: :math:`(N, S)` or `(sum(target_lengths))`. 1759 Targets cannot be blank. In the second form, the targets are assumed to be concatenated. 1760 input_lengths: :math:`(N)`. 1761 Lengths of the inputs (must each be :math:`\leq T`) 1762 target_lengths: :math:`(N)`. 1763 Lengths of the targets 1764 blank (int, optional): 1765 Blank label. Default :math:`0`. 1766 reduction (string, optional): Specifies the reduction to apply to the output: 1767 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 1768 ``'mean'``: the output losses will be divided by the target lengths and 1769 then the mean over the batch is taken, ``'sum'``: the output will be 1770 summed. Default: ``'mean'`` 1771 zero_infinity (bool, optional): 1772 Whether to zero infinite losses and the associated gradients. 1774 Infinite losses mainly occur when the inputs are too short 1775 to be aligned to the targets. 1779 >>> log_probs = torch.randn(50, 16, 20).log_softmax(2).detach().requires_grad_() 1780 >>> targets = torch.randint(1, 20, (16, 30), dtype=torch.long) 1781 >>> input_lengths = torch.full((16,), 50, dtype=torch.long) 1782 >>> target_lengths = torch.randint(10,30,(16,), dtype=torch.long) 1783 >>> loss = F.ctc_loss(log_probs, targets, input_lengths, target_lengths) 1786 return torch.ctc_loss(log_probs, targets, input_lengths, target_lengths, blank, _Reduction.get_enum(reduction),
1791 def nll_loss(input, target, weight=None, size_average=None, ignore_index=-100,
1792 reduce=
None, reduction=
'mean'):
1794 r"""The negative log likelihood loss. 1796 See :class:`~torch.nn.NLLLoss` for details. 1799 input: :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)` 1800 in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)` where :math:`K \geq 1` 1801 in the case of K-dimensional loss. 1802 target: :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`, 1803 or :math:`(N, d_1, d_2, ..., d_K)` where :math:`K \geq 1` for 1805 weight (Tensor, optional): a manual rescaling weight given to each 1806 class. If given, has to be a Tensor of size `C` 1807 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 1808 the losses are averaged over each loss element in the batch. Note that for 1809 some losses, there multiple elements per sample. If the field :attr:`size_average` 1810 is set to ``False``, the losses are instead summed for each minibatch. Ignored 1811 when reduce is ``False``. Default: ``True`` 1812 ignore_index (int, optional): Specifies a target value that is ignored 1813 and does not contribute to the input gradient. When :attr:`size_average` is 1814 ``True``, the loss is averaged over non-ignored targets. Default: -100 1815 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 1816 losses are averaged or summed over observations for each minibatch depending 1817 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 1818 batch element instead and ignores :attr:`size_average`. Default: ``True`` 1819 reduction (string, optional): Specifies the reduction to apply to the output: 1820 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 1821 ``'mean'``: the sum of the output will be divided by the number of 1822 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 1823 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 1824 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 1828 >>> # input is of size N x C = 3 x 5 1829 >>> input = torch.randn(3, 5, requires_grad=True) 1830 >>> # each element in target has to have 0 <= value < C 1831 >>> target = torch.tensor([1, 0, 4]) 1832 >>> output = F.nll_loss(F.log_softmax(input), target) 1833 >>> output.backward() 1835 if size_average
is not None or reduce
is not None:
1836 reduction = _Reduction.legacy_get_string(size_average, reduce)
1839 raise ValueError(
'Expected 2 or more dimensions (got {})'.format(dim))
1841 if input.size(0) != target.size(0):
1842 raise ValueError(
'Expected input batch_size ({}) to match target batch_size ({}).' 1843 .format(input.size(0), target.size(0)))
1845 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
1847 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
1852 out_size = (n,) + input.size()[2:]
1853 if target.size()[1:] != input.size()[2:]:
1854 raise ValueError(
'Expected target size {}, got {}'.format(
1855 out_size, target.size()))
1856 input = input.contiguous().view(n, c, 1, -1)
1857 target = target.contiguous().view(n, 1, -1)
1858 reduction_enum = _Reduction.get_enum(reduction)
1859 if reduction !=
'none':
1860 ret = torch._C._nn.nll_loss2d(
1861 input, target, weight, reduction_enum, ignore_index)
1863 out = torch._C._nn.nll_loss2d(
1864 input, target, weight, reduction_enum, ignore_index)
1865 ret = out.view(out_size)
1870 def poisson_nll_loss(input, target, log_input=True, full=False, size_average=None, eps=1e-8,
1871 reduce=
None, reduction=
'mean'):
1873 r"""Poisson negative log likelihood loss. 1875 See :class:`~torch.nn.PoissonNLLLoss` for details. 1878 input: expectation of underlying Poisson distribution. 1879 target: random sample :math:`target \sim \text{Poisson}(input)`. 1880 log_input: if ``True`` the loss is computed as 1881 :math:`\exp(\text{input}) - \text{target} * \text{input}`, if ``False`` then loss is 1882 :math:`\text{input} - \text{target} * \log(\text{input}+\text{eps})`. Default: ``True`` 1883 full: whether to compute full loss, i. e. to add the Stirling 1884 approximation term. Default: ``False`` 1885 :math:`\text{target} * \log(\text{target}) - \text{target} + 0.5 * \log(2 * \pi * \text{target})`. 1886 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 1887 the losses are averaged over each loss element in the batch. Note that for 1888 some losses, there multiple elements per sample. If the field :attr:`size_average` 1889 is set to ``False``, the losses are instead summed for each minibatch. Ignored 1890 when reduce is ``False``. Default: ``True`` 1891 eps (float, optional): Small value to avoid evaluation of :math:`\log(0)` when 1892 :attr:`log_input`=``False``. Default: 1e-8 1893 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 1894 losses are averaged or summed over observations for each minibatch depending 1895 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 1896 batch element instead and ignores :attr:`size_average`. Default: ``True`` 1897 reduction (string, optional): Specifies the reduction to apply to the output: 1898 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 1899 ``'mean'``: the sum of the output will be divided by the number of 1900 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 1901 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 1902 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 1905 if size_average
is not None or reduce
is not None:
1906 reduction = _Reduction.legacy_get_string(size_average, reduce)
1908 loss = torch.exp(input) - target * input
1910 loss = input - target * torch.log(input + eps)
1913 loss[mask] += (target * torch.log(target) - target + 0.5 * torch.log(2 * math.pi * target))[mask]
1914 if reduction ==
'none':
1916 elif reduction ==
'mean':
1917 ret = torch.mean(loss)
1918 elif reduction ==
'sum':
1919 ret = torch.sum(loss)
1922 raise ValueError(reduction +
" is not valid")
1927 def kl_div(input, target, size_average=None, reduce=None, reduction='mean'):
1929 r"""The `Kullback-Leibler divergence`_ Loss. 1931 See :class:`~torch.nn.KLDivLoss` for details. 1934 input: Tensor of arbitrary shape 1935 target: Tensor of the same shape as input 1936 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 1937 the losses are averaged over each loss element in the batch. Note that for 1938 some losses, there multiple elements per sample. If the field :attr:`size_average` 1939 is set to ``False``, the losses are instead summed for each minibatch. Ignored 1940 when reduce is ``False``. Default: ``True`` 1941 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 1942 losses are averaged or summed over observations for each minibatch depending 1943 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 1944 batch element instead and ignores :attr:`size_average`. Default: ``True`` 1945 reduction (string, optional): Specifies the reduction to apply to the output: 1946 ``'none'`` | ``'batchmean'`` | ``'sum'`` | ``'mean'``. 1947 ``'none'``: no reduction will be applied 1948 ``'batchmean'``: the sum of the output will be divided by the batchsize 1949 ``'sum'``: the output will be summed 1950 ``'mean'``: the output will be divided by the number of elements in the output 1954 :attr:`size_average` and :attr:`reduce` are in the process of being deprecated, 1955 and in the meantime, specifying either of those two args will override :attr:`reduction`. 1958 :attr:``reduction`` = ``'mean'`` doesn't return the true kl divergence value, please use 1959 :attr:``reduction`` = ``'batchmean'`` which aligns with KL math definition. 1960 In the next major release, ``'mean'`` will be changed to be the same as 'batchmean'. 1962 if size_average
is not None or reduce
is not None:
1963 reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
1965 if reduction ==
'mean':
1966 warnings.warn(
"reduction: 'mean' divides the total loss by both the batch size and the support size." 1967 "'batchmean' divides only by the batch size, and aligns with the KL div math definition." 1968 "'mean' will be changed to behave the same as 'batchmean' in the next major release.")
1971 if reduction ==
'batchmean':
1972 reduction_enum = _Reduction.get_enum(
'sum')
1974 reduction_enum = _Reduction.get_enum(reduction)
1976 reduced = torch.kl_div(input, target, reduction_enum)
1978 if reduction ==
'batchmean' and input.dim() != 0:
1979 reduced = reduced / input.size()[0]
1985 def cross_entropy(input, target, weight=None, size_average=None, ignore_index=-100,
1986 reduce=
None, reduction=
'mean'):
1988 r"""This criterion combines `log_softmax` and `nll_loss` in a single 1991 See :class:`~torch.nn.CrossEntropyLoss` for details. 1994 input (Tensor) : :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)` 1995 in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)` where :math:`K \geq 1` 1996 in the case of K-dimensional loss. 1997 target (Tensor) : :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`, 1998 or :math:`(N, d_1, d_2, ..., d_K)` where :math:`K \geq 1` for 2000 weight (Tensor, optional): a manual rescaling weight given to each 2001 class. If given, has to be a Tensor of size `C` 2002 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 2003 the losses are averaged over each loss element in the batch. Note that for 2004 some losses, there multiple elements per sample. If the field :attr:`size_average` 2005 is set to ``False``, the losses are instead summed for each minibatch. Ignored 2006 when reduce is ``False``. Default: ``True`` 2007 ignore_index (int, optional): Specifies a target value that is ignored 2008 and does not contribute to the input gradient. When :attr:`size_average` is 2009 ``True``, the loss is averaged over non-ignored targets. Default: -100 2010 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 2011 losses are averaged or summed over observations for each minibatch depending 2012 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 2013 batch element instead and ignores :attr:`size_average`. Default: ``True`` 2014 reduction (string, optional): Specifies the reduction to apply to the output: 2015 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 2016 ``'mean'``: the sum of the output will be divided by the number of 2017 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 2018 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 2019 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 2023 >>> input = torch.randn(3, 5, requires_grad=True) 2024 >>> target = torch.randint(5, (3,), dtype=torch.int64) 2025 >>> loss = F.cross_entropy(input, target) 2028 if size_average
is not None or reduce
is not None:
2029 reduction = _Reduction.legacy_get_string(size_average, reduce)
2030 return nll_loss(log_softmax(input, 1), target, weight,
None, ignore_index,
None, reduction)
2034 def binary_cross_entropy(input, target, weight=None, size_average=None,
2035 reduce=
None, reduction=
'mean'):
2037 r"""Function that measures the Binary Cross Entropy 2038 between the target and the output. 2040 See :class:`~torch.nn.BCELoss` for details. 2043 input: Tensor of arbitrary shape 2044 target: Tensor of the same shape as input 2045 weight (Tensor, optional): a manual rescaling weight 2046 if provided it's repeated to match input tensor shape 2047 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 2048 the losses are averaged over each loss element in the batch. Note that for 2049 some losses, there multiple elements per sample. If the field :attr:`size_average` 2050 is set to ``False``, the losses are instead summed for each minibatch. Ignored 2051 when reduce is ``False``. Default: ``True`` 2052 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 2053 losses are averaged or summed over observations for each minibatch depending 2054 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 2055 batch element instead and ignores :attr:`size_average`. Default: ``True`` 2056 reduction (string, optional): Specifies the reduction to apply to the output: 2057 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 2058 ``'mean'``: the sum of the output will be divided by the number of 2059 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 2060 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 2061 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 2065 >>> input = torch.randn((3, 2), requires_grad=True) 2066 >>> target = torch.rand((3, 2), requires_grad=False) 2067 >>> loss = F.binary_cross_entropy(F.sigmoid(input), target) 2070 if size_average
is not None or reduce
is not None:
2071 reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2073 reduction_enum = _Reduction.get_enum(reduction)
2074 if not (target.size() == input.size()):
2075 warnings.warn(
"Using a target size ({}) that is different to the input size ({}) is deprecated. " 2076 "Please ensure they have the same size.".format(target.size(), input.size()))
2077 if input.numel() != target.numel():
2078 raise ValueError(
"Target and input must have the same number of elements. target nelement ({}) " 2079 "!= input nelement ({})".format(target.numel(), input.numel()))
2081 if weight
is not None:
2082 new_size = _infer_size(target.size(), weight.size())
2083 weight = weight.expand(new_size)
2085 return torch._C._nn.binary_cross_entropy(
2086 input, target, weight, reduction_enum)
2090 def binary_cross_entropy_with_logits(input, target, weight=None, size_average=None,
2091 reduce=
None, reduction=
'mean', pos_weight=
None):
2093 r"""Function that measures Binary Cross Entropy between target and output 2096 See :class:`~torch.nn.BCEWithLogitsLoss` for details. 2099 input: Tensor of arbitrary shape 2100 target: Tensor of the same shape as input 2101 weight (Tensor, optional): a manual rescaling weight 2102 if provided it's repeated to match input tensor shape 2103 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 2104 the losses are averaged over each loss element in the batch. Note that for 2105 some losses, there multiple elements per sample. If the field :attr:`size_average` 2106 is set to ``False``, the losses are instead summed for each minibatch. Ignored 2107 when reduce is ``False``. Default: ``True`` 2108 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 2109 losses are averaged or summed over observations for each minibatch depending 2110 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 2111 batch element instead and ignores :attr:`size_average`. Default: ``True`` 2112 reduction (string, optional): Specifies the reduction to apply to the output: 2113 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 2114 ``'mean'``: the sum of the output will be divided by the number of 2115 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 2116 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 2117 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 2118 pos_weight (Tensor, optional): a weight of positive examples. 2119 Must be a vector with length equal to the number of classes. 2123 >>> input = torch.randn(3, requires_grad=True) 2124 >>> target = torch.empty(3).random_(2) 2125 >>> loss = F.binary_cross_entropy_with_logits(input, target) 2128 if size_average
is not None or reduce
is not None:
2129 reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2131 reduction_enum = _Reduction.get_enum(reduction)
2133 if not (target.size() == input.size()):
2134 raise ValueError(
"Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
2136 return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
2139 def _pointwise_loss(lambd, lambd_optimized, input, target, reduction='mean'):
2140 if target.requires_grad:
2141 d = lambd(input, target)
2142 if reduction ==
'none':
2144 return torch.mean(d)
if reduction ==
'mean' else torch.sum(d)
2146 expanded_input, expanded_target = torch.broadcast_tensors(input, target)
2147 return lambd_optimized(expanded_input, expanded_target, _Reduction.get_enum(reduction))
2151 def _smooth_l1_loss(input, target):
2153 t = torch.abs(input - target)
2154 return torch.where(t < 1, 0.5 * t ** 2, t - 0.5)
2158 def smooth_l1_loss(input, target, size_average=None, reduce=None, reduction='mean'):
2160 r"""Function that uses a squared term if the absolute 2161 element-wise error falls below 1 and an L1 term otherwise. 2163 See :class:`~torch.nn.SmoothL1Loss` for details. 2165 if size_average
is not None or reduce
is not None:
2166 reduction = _Reduction.legacy_get_string(size_average, reduce)
2167 if target.requires_grad:
2168 ret = _smooth_l1_loss(input, target)
2169 if reduction !=
'none':
2170 ret = torch.mean(ret)
if reduction ==
'mean' else torch.sum(ret)
2172 expanded_input, expanded_target = torch.broadcast_tensors(input, target)
2173 ret = torch._C._nn.smooth_l1_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
2178 def l1_loss(input, target, size_average=None, reduce=None, reduction='mean'):
2180 r"""l1_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor 2182 Function that takes the mean element-wise absolute value difference. 2184 See :class:`~torch.nn.L1Loss` for details. 2186 if size_average
is not None or reduce
is not None:
2187 reduction = _Reduction.legacy_get_string(size_average, reduce)
2188 if target.requires_grad:
2189 ret = torch.abs(input - target)
2190 if reduction !=
'none':
2191 ret = torch.mean(ret)
if reduction ==
'mean' else torch.sum(ret)
2193 expanded_input, expanded_target = torch.broadcast_tensors(input, target)
2194 ret = torch._C._nn.l1_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
2199 def mse_loss(input, target, size_average=None, reduce=None, reduction='mean'):
2201 r"""mse_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor 2203 Measures the element-wise mean squared error. 2205 See :class:`~torch.nn.MSELoss` for details. 2207 if size_average
is not None or reduce
is not None:
2208 reduction = _Reduction.legacy_get_string(size_average, reduce)
2209 if target.requires_grad:
2210 ret = (input - target) ** 2
2211 if reduction !=
'none':
2212 ret = torch.mean(ret)
if reduction ==
'mean' else torch.sum(ret)
2214 expanded_input, expanded_target = torch.broadcast_tensors(input, target)
2215 ret = torch._C._nn.mse_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
2220 def margin_ranking_loss(input1, input2, target, margin=0, size_average=None,
2221 reduce=
None, reduction=
'mean'):
2223 r"""margin_ranking_loss(input1, input2, target, margin=0, size_average=None, reduce=None, reduction='mean') -> Tensor 2225 See :class:`~torch.nn.MarginRankingLoss` for details. 2227 if size_average
is not None or reduce
is not None:
2228 reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2230 reduction_enum = _Reduction.get_enum(reduction)
2231 if input1.dim() == 0
or input2.dim() == 0
or target.dim() == 0:
2232 raise RuntimeError((
"margin_ranking_loss does not support scalars, got sizes: " 2233 "input1: {}, input2: {}, target: {} ".format(input1.size(), input2.size(), target.size())))
2234 return torch.margin_ranking_loss(input1, input2, target, margin, reduction_enum)
2238 def hinge_embedding_loss(input, target, margin=1.0, size_average=None,
2239 reduce=
None, reduction=
'mean'):
2241 r"""hinge_embedding_loss(input, target, margin=1.0, size_average=None, reduce=None, reduction='mean') -> Tensor 2243 See :class:`~torch.nn.HingeEmbeddingLoss` for details. 2245 if size_average
is not None or reduce
is not None:
2246 reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2248 reduction_enum = _Reduction.get_enum(reduction)
2249 return torch.hinge_embedding_loss(input, target, margin, reduction_enum)
2253 def multilabel_margin_loss(input, target, size_average=None, reduce=None, reduction='mean'):
2255 r"""multilabel_margin_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor 2257 See :class:`~torch.nn.MultiLabelMarginLoss` for details. 2259 if size_average
is not None or reduce
is not None:
2260 reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2262 reduction_enum = _Reduction.get_enum(reduction)
2263 return torch._C._nn.multilabel_margin_loss(input, target, reduction_enum)
2267 def soft_margin_loss(input, target, size_average=None, reduce=None, reduction='mean'):
2269 r"""soft_margin_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor 2271 See :class:`~torch.nn.SoftMarginLoss` for details. 2273 if size_average
is not None or reduce
is not None:
2274 reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2276 reduction_enum = _Reduction.get_enum(reduction)
2277 return torch._C._nn.soft_margin_loss(input, target, reduction_enum)
2281 def multilabel_soft_margin_loss(input, target, weight=None, size_average=None,
2282 reduce=
None, reduction=
'mean'):
2284 r"""multilabel_soft_margin_loss(input, target, weight=None, size_average=None) -> Tensor 2286 See :class:`~torch.nn.MultiLabelSoftMarginLoss` for details. 2288 if size_average
is not None or reduce
is not None:
2289 reduction = _Reduction.legacy_get_string(size_average, reduce)
2291 loss = -(target * logsigmoid(input) + (1 - target) * logsigmoid(-input))
2293 if weight
is not None:
2294 loss = loss * weight
2296 loss = loss.sum(dim=1) / input.size(1)
2298 if reduction ==
'none':
2300 elif reduction ==
'mean':
2302 elif reduction ==
'sum':
2306 raise ValueError(reduction +
" is not valid")
2311 def cosine_embedding_loss(input1, input2, target, margin=0, size_average=None,
2312 reduce=
None, reduction=
'mean'):
2314 r"""cosine_embedding_loss(input1, input2, target, margin=0, size_average=None, reduce=None, reduction='mean') -> Tensor 2316 See :class:`~torch.nn.CosineEmbeddingLoss` for details. 2318 if size_average
is not None or reduce
is not None:
2319 reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2321 reduction_enum = _Reduction.get_enum(reduction)
2322 return torch.cosine_embedding_loss(input1, input2, target, margin, reduction_enum)
2326 def multi_margin_loss(input, target, p=1, margin=1., weight=None, size_average=None,
2327 reduce=
None, reduction=
'mean'):
2329 r"""multi_margin_loss(input, target, p=1, margin=1, weight=None, size_average=None, 2330 reduce=None, reduction='mean') -> Tensor 2332 See :class:`~torch.nn.MultiMarginLoss` for details. 2334 if size_average
is not None or reduce
is not None:
2335 reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2337 reduction_enum = _Reduction.get_enum(reduction)
2338 if p != 1
and p != 2:
2339 raise ValueError(
'only p == 1 and p == 2 supported')
2340 if weight
is not None:
2341 if weight.dim() != 1:
2342 raise ValueError(
'weight must be one-dimensional')
2344 return torch._C._nn.multi_margin_loss(input, target, p, margin, weight, reduction_enum)
2347 pixel_shuffle = _add_docstr(torch.pixel_shuffle,
r""" 2348 Rearranges elements in a tensor of shape :math:`(*, C \times r^2, H, W)` to a 2349 tensor of shape :math:`(*, C, H \times r, W \times r)`. 2351 See :class:`~torch.nn.PixelShuffle` for details. 2354 input (Tensor): the input tensor 2355 upscale_factor (int): factor to increase spatial resolution by 2359 >>> input = torch.randn(1, 9, 4, 4) 2360 >>> output = torch.nn.functional.pixel_shuffle(input, 3) 2361 >>> print(output.size()) 2362 torch.Size([1, 1, 12, 12]) 2366 def upsample(input, size=None, scale_factor=None, mode='nearest', align_corners=None):
2367 r"""Upsamples the input to either the given :attr:`size` or the given 2368 :attr:`scale_factor` 2371 This function is deprecated in favor of :func:`torch.nn.functional.interpolate`. 2372 This is equivalent with ``nn.functional.interpolate(...)``. 2374 .. include:: cuda_deterministic_backward.rst 2376 The algorithm used for upsampling is determined by :attr:`mode`. 2378 Currently temporal, spatial and volumetric upsampling are supported, i.e. 2379 expected inputs are 3-D, 4-D or 5-D in shape. 2381 The input dimensions are interpreted in the form: 2382 `mini-batch x channels x [optional depth] x [optional height] x width`. 2384 The modes available for upsampling are: `nearest`, `linear` (3D-only), 2385 `bilinear`, `bicubic` (4D-only), `trilinear` (5D-only) 2388 input (Tensor): the input tensor 2389 size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]): 2390 output spatial size. 2391 scale_factor (float or Tuple[float]): multiplier for spatial size. Has to be an integer. 2392 mode (string): algorithm used for upsampling: 2393 ``'nearest'`` | ``'linear'`` | ``'bilinear'`` | ``'bicubic'`` | 2394 ``'trilinear'``. Default: ``'nearest'`` 2395 align_corners (bool, optional): Geometrically, we consider the pixels of the 2396 input and output as squares rather than points. 2397 If set to ``True``, the input and output tensors are aligned by the 2398 center points of their corner pixels. If set to ``False``, the input and 2399 output tensors are aligned by the corner points of their corner 2400 pixels, and the interpolation uses edge value padding for out-of-boundary values. 2401 This only has effect when :attr:`mode` is ``'linear'``, 2402 ``'bilinear'``, ``'bicubic'`` or ``'trilinear'``. 2406 With ``align_corners = True``, the linearly interpolating modes 2407 (`linear`, `bilinear`, and `trilinear`) don't proportionally align the 2408 output and input pixels, and thus the output values can depend on the 2409 input size. This was the default behavior for these modes up to version 2410 0.3.1. Since then, the default behavior is ``align_corners = False``. 2411 See :class:`~torch.nn.Upsample` for concrete examples on how this 2412 affects the outputs. 2415 warnings.warn(
"nn.functional.upsample is deprecated. Use nn.functional.interpolate instead.")
2416 return interpolate(input, size, scale_factor, mode, align_corners)
2419 def interpolate(input, size=None, scale_factor=None, mode='nearest', align_corners=None):
2420 r"""Down/up samples the input to either the given :attr:`size` or the given 2421 :attr:`scale_factor` 2423 The algorithm used for interpolation is determined by :attr:`mode`. 2425 Currently temporal, spatial and volumetric sampling are supported, i.e. 2426 expected inputs are 3-D, 4-D or 5-D in shape. 2428 The input dimensions are interpreted in the form: 2429 `mini-batch x channels x [optional depth] x [optional height] x width`. 2431 The modes available for resizing are: `nearest`, `linear` (3D-only), 2432 `bilinear`, `bicubic` (4D-only), `trilinear` (5D-only), `area` 2435 input (Tensor): the input tensor 2436 size (int or Tuple[int] or Tuple[int, int] or Tuple[int, int, int]): 2437 output spatial size. 2438 scale_factor (float or Tuple[float]): multiplier for spatial size. Has to match input size if it is a tuple. 2439 mode (str): algorithm used for upsampling: 2440 ``'nearest'`` | ``'linear'`` | ``'bilinear'`` | ``'bicubic'`` | 2441 ``'trilinear'`` | ``'area'``. Default: ``'nearest'`` 2442 align_corners (bool, optional): Geometrically, we consider the pixels of the 2443 input and output as squares rather than points. 2444 If set to ``True``, the input and output tensors are aligned by the 2445 center points of their corner pixels. If set to ``False``, the input and 2446 output tensors are aligned by the corner points of their corner 2447 pixels, and the interpolation uses edge value padding for out-of-boundary values. 2448 This only has effect when :attr:`mode` is ``'linear'``, 2449 ``'bilinear'``, ``'bicubic'``, or ``'trilinear'``. 2453 With ``align_corners = True``, the linearly interpolating modes 2454 (`linear`, `bilinear`, and `trilinear`) don't proportionally align the 2455 output and input pixels, and thus the output values can depend on the 2456 input size. This was the default behavior for these modes up to version 2457 0.3.1. Since then, the default behavior is ``align_corners = False``. 2458 See :class:`~torch.nn.Upsample` for concrete examples on how this 2459 affects the outputs. 2461 .. include:: cuda_deterministic_backward.rst 2463 from numbers
import Integral
2464 from .modules.utils
import _ntuple
2466 def _check_size_scale_factor(dim):
2467 if size
is None and scale_factor
is None:
2468 raise ValueError(
'either size or scale_factor should be defined')
2469 if size
is not None and scale_factor
is not None:
2470 raise ValueError(
'only one of size or scale_factor should be defined')
2471 if scale_factor
is not None and isinstance(scale_factor, tuple)\
2472 and len(scale_factor) != dim:
2473 raise ValueError(
'scale_factor shape must match input shape. ' 2474 'Input is {}D, scale_factor size is {}'.format(dim, len(scale_factor)))
2476 def _output_size(dim):
2477 _check_size_scale_factor(dim)
2478 if size
is not None:
2480 scale_factors = _ntuple(dim)(scale_factor)
2482 return [int(math.floor(input.size(i + 2) * scale_factors[i]))
for i
in range(dim)]
2484 if mode
in (
'nearest',
'area'):
2485 if align_corners
is not None:
2486 raise ValueError(
"align_corners option can only be set with the " 2487 "interpolating modes: linear | bilinear | bicubic | trilinear")
2489 if align_corners
is None:
2490 warnings.warn(
"Default upsampling behavior when mode={} is changed " 2491 "to align_corners=False since 0.4.0. Please specify " 2492 "align_corners=True if the old behavior is desired. " 2493 "See the documentation of nn.Upsample for details.".format(mode))
2494 align_corners =
False 2496 if input.dim() == 3
and mode ==
'nearest':
2497 return torch._C._nn.upsample_nearest1d(input, _output_size(1))
2498 elif input.dim() == 4
and mode ==
'nearest':
2499 return torch._C._nn.upsample_nearest2d(input, _output_size(2))
2500 elif input.dim() == 5
and mode ==
'nearest':
2501 return torch._C._nn.upsample_nearest3d(input, _output_size(3))
2502 elif input.dim() == 3
and mode ==
'area':
2503 return adaptive_avg_pool1d(input, _output_size(1))
2504 elif input.dim() == 4
and mode ==
'area':
2505 return adaptive_avg_pool2d(input, _output_size(2))
2506 elif input.dim() == 5
and mode ==
'area':
2507 return adaptive_avg_pool3d(input, _output_size(3))
2508 elif input.dim() == 3
and mode ==
'linear':
2509 return torch._C._nn.upsample_linear1d(input, _output_size(1), align_corners)
2510 elif input.dim() == 3
and mode ==
'bilinear':
2511 raise NotImplementedError(
"Got 3D input, but bilinear mode needs 4D input")
2512 elif input.dim() == 3
and mode ==
'trilinear':
2513 raise NotImplementedError(
"Got 3D input, but trilinear mode needs 5D input")
2514 elif input.dim() == 4
and mode ==
'linear':
2515 raise NotImplementedError(
"Got 4D input, but linear mode needs 3D input")
2516 elif input.dim() == 4
and mode ==
'bilinear':
2517 return torch._C._nn.upsample_bilinear2d(input, _output_size(2), align_corners)
2518 elif input.dim() == 4
and mode ==
'trilinear':
2519 raise NotImplementedError(
"Got 4D input, but trilinear mode needs 5D input")
2520 elif input.dim() == 5
and mode ==
'linear':
2521 raise NotImplementedError(
"Got 5D input, but linear mode needs 3D input")
2522 elif input.dim() == 5
and mode ==
'bilinear':
2523 raise NotImplementedError(
"Got 5D input, but bilinear mode needs 4D input")
2524 elif input.dim() == 5
and mode ==
'trilinear':
2525 return torch._C._nn.upsample_trilinear3d(input, _output_size(3), align_corners)
2526 elif input.dim() == 4
and mode ==
'bicubic':
2527 return torch._C._nn.upsample_bicubic2d(input, _output_size(2), align_corners)
2529 raise NotImplementedError(
"Input Error: Only 3D, 4D and 5D input Tensors supported" 2530 " (got {}D) for the modes: nearest | linear | bilinear | bicubic | trilinear" 2531 " (got {})".format(input.dim(), mode))
2534 def upsample_nearest(input, size=None, scale_factor=None):
2535 r"""Upsamples the input, using nearest neighbours' pixel values. 2538 This function is deprecated in favor of :func:`torch.nn.functional.interpolate`. 2539 This is equivalent with ``nn.functional.interpolate(..., mode='nearest')``. 2541 Currently spatial and volumetric upsampling are supported (i.e. expected 2542 inputs are 4 or 5 dimensional). 2545 input (Tensor): input 2546 size (int or Tuple[int, int] or Tuple[int, int, int]): output spatia 2548 scale_factor (int): multiplier for spatial size. Has to be an integer. 2550 .. include:: cuda_deterministic_backward.rst 2553 warnings.warn(
"nn.functional.upsample_nearest is deprecated. Use nn.functional.interpolate instead.")
2554 return interpolate(input, size, scale_factor, mode=
'nearest')
2557 def upsample_bilinear(input, size=None, scale_factor=None):
2558 r"""Upsamples the input, using bilinear upsampling. 2561 This function is deprecated in favor of :func:`torch.nn.functional.interpolate`. 2562 This is equivalent with 2563 ``nn.functional.interpolate(..., mode='bilinear', align_corners=True)``. 2565 Expected inputs are spatial (4 dimensional). Use `upsample_trilinear` fo 2566 volumetric (5 dimensional) inputs. 2569 input (Tensor): input 2570 size (int or Tuple[int, int]): output spatial size. 2571 scale_factor (int or Tuple[int, int]): multiplier for spatial size 2573 .. include:: cuda_deterministic_backward.rst 2576 warnings.warn(
"nn.functional.upsample_bilinear is deprecated. Use nn.functional.interpolate instead.")
2577 return interpolate(input, size, scale_factor, mode=
'bilinear', align_corners=
True)
2580 GRID_SAMPLE_INTERPOLATION_MODES = {
2585 GRID_SAMPLE_PADDING_MODES = {
2593 def grid_sample(input, grid, mode='bilinear', padding_mode='zeros'):
2595 r"""Given an :attr:`input` and a flow-field :attr:`grid`, computes the 2596 ``output`` using :attr:`input` values and pixel locations from :attr:`grid`. 2598 Currently, only spatial (4-D) and volumetric (5-D) :attr:`input` are 2601 In the spatial (4-D) case, for :attr:`input` with shape 2602 :math:`(N, C, H_\text{in}, W_\text{in})` and :attr:`grid` with shape 2603 :math:`(N, H_\text{out}, W_\text{out}, 2)`, the output will have shape 2604 :math:`(N, C, H_\text{out}, W_\text{out})`. 2606 For each output location ``output[n, :, h, w]``, the size-2 vector 2607 ``grid[n, h, w]`` specifies :attr:`input` pixel locations ``x`` and ``y``, 2608 which are used to interpolate the output value ``output[n, :, h, w]``. 2609 In the case of 5D inputs, ``grid[n, d, h, w]`` specifies the 2610 ``x``, ``y``, ``z`` pixel locations for interpolating 2611 ``output[n, :, d, h, w]``. :attr:`mode` argument specifies ``nearest`` or 2612 ``bilinear`` interpolation method to sample the input pixels. 2614 :attr:`grid` should have most values in the range of ``[-1, 1]``. This is 2615 because the pixel locations are normalized by the :attr:`input` spatial 2616 dimensions. For example, values ``x = -1, y = -1`` is the left-top pixel of 2617 :attr:`input`, and values ``x = 1, y = 1`` is the right-bottom pixel of 2620 If :attr:`grid` has values outside the range of ``[-1, 1]``, those locations 2621 are handled as defined by :attr:`padding_mode`. Options are 2623 * ``padding_mode="zeros"``: use ``0`` for out-of-bound values, 2624 * ``padding_mode="border"``: use border values for out-of-bound values, 2625 * ``padding_mode="reflection"``: use values at locations reflected by 2626 the border for out-of-bound values. For location far away from the 2627 border, it will keep being reflected until becoming in bound, e.g., 2628 (normalized) pixel location ``x = -3.5`` reflects by ``-1`` and 2629 becomes ``x' = 1.5``, then reflects by border ``1`` and becomes 2632 .. Note:: This function is often used in building Spatial Transformer Networks. 2633 .. include:: cuda_deterministic_backward.rst 2636 input (Tensor): input of shape :math:`(N, C, H_\text{in}, W_\text{in})` (4-D case) 2637 or :math:`(N, C, D_\text{in}, H_\text{in}, W_\text{in})` (5-D case) 2638 grid (Tensor): flow-field of shape :math:`(N, H_\text{out}, W_\text{out}, 2)` (4-D case) 2639 or :math:`(N, D_\text{out}, H_\text{out}, W_\text{out}, 3)` (5-D case) 2640 mode (str): interpolation mode to calculate output values 2641 ``'bilinear'`` | ``'nearest'``. Default: ``'bilinear'`` 2642 padding_mode (str): padding mode for outside grid values 2643 ``'zeros'`` | ``'border'`` | ``'reflection'``. Default: ``'zeros'`` 2646 output (Tensor): output Tensor 2649 if mode !=
'bilinear' and mode !=
'nearest':
2650 raise ValueError(
"nn.functional.grid_sample(): expected mode to be " 2651 "'bilinear' or 'nearest', but got: '{}'".format(mode))
2652 if padding_mode !=
'zeros' and padding_mode !=
'border' and padding_mode !=
'reflection':
2653 raise ValueError(
"nn.functional.grid_sample(): expected padding_mode " 2654 "to be 'zeros', 'border', or 'reflection', " 2655 "but got: '{}'".format(padding_mode))
2657 if mode ==
'bilinear':
2662 if padding_mode ==
'zeros':
2663 padding_mode_enum = 0
2664 elif padding_mode ==
'border':
2665 padding_mode_enum = 1
2667 padding_mode_enum = 2
2669 return torch.grid_sampler(input, grid, mode_enum, padding_mode_enum)
2673 def affine_grid(theta, size):
2675 r"""Generates a 2d flow field, given a batch of affine matrices :attr:`theta`. 2676 Generally used in conjunction with :func:`grid_sample` to 2677 implement Spatial Transformer Networks. 2680 theta (Tensor): input batch of affine matrices (:math:`N \times 2 \times 3`) 2681 size (torch.Size): the target output image size (:math:`N \times C \times H \times W`). 2682 Example: torch.Size((32, 3, 24, 24)) 2685 output (Tensor): output Tensor of size (:math:`N \times H \times W \times 2`) 2687 return vision.affine_grid_generator(theta, size)
2691 def pad(input, pad, mode='constant', value=0):
2696 The padding size by which to pad some dimensions of :attr:`input` 2697 are described starting from the last dimension and moving forward. 2698 :math:`\left\lfloor\frac{\text{len(pad)}}{2}\right\rfloor` dimensions 2699 of ``input`` will be padded. 2700 For example, to pad only the last dimension of the input tensor, then 2701 :attr:`pad` has the form 2702 :math:`(\text{padding\_left}, \text{padding\_right})`; 2703 to pad the last 2 dimensions of the input tensor, then use 2704 :math:`(\text{padding\_left}, \text{padding\_right},` 2705 :math:`\text{padding\_top}, \text{padding\_bottom})`; 2706 to pad the last 3 dimensions, use 2707 :math:`(\text{padding\_left}, \text{padding\_right},` 2708 :math:`\text{padding\_top}, \text{padding\_bottom}` 2709 :math:`\text{padding\_front}, \text{padding\_back})`. 2712 See :class:`torch.nn.ConstantPad2d`, :class:`torch.nn.ReflectionPad2d`, and 2713 :class:`torch.nn.ReplicationPad2d` for concrete examples on how each of the 2714 padding modes works. Constant padding is implemented for arbitrary dimensions. 2715 Replicate padding is implemented for padding the last 3 dimensions of 5D input 2716 tensor, or the last 2 dimensions of 4D input tensor, or the last dimension of 2717 3D input tensor. Reflect padding is only implemented for padding the last 2 2718 dimensions of 4D input tensor, or the last dimension of 3D input tensor. 2720 .. include:: cuda_deterministic_backward.rst 2723 input (Tensor): N-dimensional tensor 2724 pad (tuple): m-elements tuple, where 2725 :math:`\frac{m}{2} \leq` input dimensions and :math:`m` is even. 2726 mode: ``'constant'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. 2727 Default: ``'constant'`` 2728 value: fill value for ``'constant'`` padding. Default: ``0`` 2732 >>> t4d = torch.empty(3, 3, 4, 2) 2733 >>> p1d = (1, 1) # pad last dim by 1 on each side 2734 >>> out = F.pad(t4d, p1d, "constant", 0) # effectively zero padding 2735 >>> print(out.data.size()) 2736 torch.Size([3, 3, 4, 4]) 2737 >>> p2d = (1, 1, 2, 2) # pad last dim by (1, 1) and 2nd to last by (2, 2) 2738 >>> out = F.pad(t4d, p2d, "constant", 0) 2739 >>> print(out.data.size()) 2740 torch.Size([3, 3, 8, 4]) 2741 >>> t4d = torch.empty(3, 3, 4, 2) 2742 >>> p3d = (0, 1, 2, 1, 3, 3) # pad by (0, 1), (2, 1), and (3, 3) 2743 >>> out = F.pad(t4d, p3d, "constant", 0) 2744 >>> print(out.data.size()) 2745 torch.Size([3, 9, 7, 3]) 2748 assert len(pad) % 2 == 0,
'Padding length must be divisible by 2' 2749 assert len(pad) // 2 <= input.dim(),
'Padding length too large' 2750 if mode ==
'constant':
2751 ret = _VF.constant_pad_nd(input, pad, value)
2753 assert value == 0,
'Padding mode "{}"" doesn\'t take in value argument'.format(mode)
2754 if input.dim() == 3:
2755 assert len(pad) == 2,
'3D tensors expect 2 values for padding' 2756 if mode ==
'reflect':
2757 ret = torch._C._nn.reflection_pad1d(input, pad)
2758 elif mode ==
'replicate':
2759 ret = torch._C._nn.replication_pad1d(input, pad)
2760 elif mode ==
'circular':
2761 ret = pad_circular(input, pad)
2764 raise NotImplementedError
2766 elif input.dim() == 4:
2767 assert len(pad) == 4,
'4D tensors expect 4 values for padding' 2768 if mode ==
'reflect':
2769 ret = torch._C._nn.reflection_pad2d(input, pad)
2770 elif mode ==
'replicate':
2771 ret = torch._C._nn.replication_pad2d(input, pad)
2772 elif mode ==
'circular':
2773 ret = pad_circular(input, pad)
2776 raise NotImplementedError
2778 elif input.dim() == 5:
2779 assert len(pad) == 6,
'5D tensors expect 6 values for padding' 2780 if mode ==
'reflect':
2782 raise NotImplementedError
2783 elif mode ==
'replicate':
2784 ret = torch._C._nn.replication_pad3d(input, pad)
2785 elif mode ==
'circular':
2786 ret = pad_circular(input, pad)
2789 raise NotImplementedError
2792 raise NotImplementedError(
"Only 3D, 4D, 5D padding with non-constant padding are supported for now")
2800 def pairwise_distance(x1, x2, p=2., eps=1e-6, keepdim=False):
2803 See :class:`torch.nn.PairwiseDistance` for details 2805 return torch.pairwise_distance(x1, x2, p, eps, keepdim)
2808 pdist = _add_docstr(torch.pdist,
r""" 2809 pdist(input, p=2) -> Tensor 2811 Computes the p-norm distance between every pair of row vectors in the input. 2812 This is identical to the upper triangular portion, excluding the diagonal, of 2813 `torch.norm(input[:, None] - input, dim=2, p=p)`. This function will be faster 2814 if the rows are contiguous. 2816 If input has shape :math:`N \times M` then the output will have shape 2817 :math:`\frac{1}{2} N (N - 1)`. 2819 This function is equivalent to `scipy.spatial.distance.pdist(input, 2820 'minkowski', p=p)` if :math:`p \in (0, \infty)`. When :math:`p = 0` it is 2821 equivalent to `scipy.spatial.distance.pdist(input, 'hamming') * M`. 2822 When :math:`p = \infty`, the closest scipy function is 2823 `scipy.spatial.distance.pdist(xn, lambda x, y: np.abs(x - y).max())`. 2826 input: input tensor of shape :math:`N \times M`. 2827 p: p value for the p-norm distance to calculate between each vector pair 2828 :math:`\in [0, \infty]`. 2832 cosine_similarity = _add_docstr(torch.cosine_similarity,
r""" 2833 cosine_similarity(x1, x2, dim=1, eps=1e-8) -> Tensor 2835 Returns cosine similarity between x1 and x2, computed along dim. 2838 \text{similarity} = \dfrac{x_1 \cdot x_2}{\max(\Vert x_1 \Vert _2 \cdot \Vert x_2 \Vert _2, \epsilon)} 2841 x1 (Tensor): First input. 2842 x2 (Tensor): Second input (of size matching x1). 2843 dim (int, optional): Dimension of vectors. Default: 1 2844 eps (float, optional): Small value to avoid division by zero. 2848 - Input: :math:`(\ast_1, D, \ast_2)` where D is at position `dim`. 2849 - Output: :math:`(\ast_1, \ast_2)` where 1 is at position `dim`. 2853 >>> input1 = torch.randn(100, 128) 2854 >>> input2 = torch.randn(100, 128) 2855 >>> output = F.cosine_similarity(input1, input2) 2860 one_hot = _add_docstr(torch._C._nn.one_hot,
r""" 2861 one_hot(tensor, num_classes=0) -> LongTensor 2863 Takes LongTensor with index values of shape ``(*)`` and returns a tensor 2864 of shape ``(*, num_classes)`` that have zeros everywhere except where the 2865 index of last dimension matches the corresponding value of the input tensor, 2866 in which case it will be 1. 2868 See also `One-hot on Wikipedia`_ . 2870 .. _One-hot on Wikipedia: 2871 https://en.wikipedia.org/wiki/One-hot 2874 tensor (LongTensor): class values of any shape. 2875 num_classes (int): Total number of classes. If set to -1, the number 2876 of classes will be inferred as one greater than the largest class 2877 value in the input tensor. 2880 LongTensor that has one more dimension with 1 values at the 2881 index of last dimension indicated by the input, and 0 everywhere 2885 >>> F.one_hot(torch.arange(0, 5) % 3) 2891 >>> F.one_hot(torch.arange(0, 5) % 3, num_classes=5) 2892 tensor([[1, 0, 0, 0, 0], 2897 >>> F.one_hot(torch.arange(0, 6).view(3,2) % 3) 2908 def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-6, swap=False, size_average=None,
2909 reduce=
None, reduction=
"mean"):
2912 See :class:`~torch.nn.TripletMarginLoss` for details 2914 if size_average
is not None or reduce
is not None:
2915 reduction_enum = _Reduction.legacy_get_enum(size_average, reduce)
2917 reduction_enum = _Reduction.get_enum(reduction)
2918 return torch.triplet_margin_loss(anchor, positive, negative, margin, p, eps,
2919 swap, reduction_enum)
2923 def normalize(input, p=2, dim=1, eps=1e-12, out=None):
2925 r"""Performs :math:`L_p` normalization of inputs over specified dimension. 2927 For a tensor :attr:`input` of sizes :math:`(n_0, ..., n_{dim}, ..., n_k)`, each 2928 :math:`n_{dim}` -element vector :math:`v` along dimension :attr:`dim` is transformed as 2931 v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}. 2933 With the default arguments it uses the Euclidean norm over vectors along dimension :math:`1` for normalization. 2936 input: input tensor of any shape 2937 p (float): the exponent value in the norm formulation. Default: 2 2938 dim (int): the dimension to reduce. Default: 1 2939 eps (float): small value to avoid division by zero. Default: 1e-12 2940 out (Tensor, optional): the output tensor. If :attr:`out` is used, this 2941 operation won't be differentiable. 2944 denom = input.norm(p, dim,
True).clamp_min(eps).expand_as(input)
2947 denom = input.norm(p, dim,
True).clamp_min(eps).expand_as(input)
2948 ret = torch.div(input, denom, out=out)
2952 def assert_int_or_pair(arg, arg_name, message):
2953 assert isinstance(arg, int)
or len(arg) == 2, message.format(arg_name)
2957 def unfold(input, kernel_size, dilation=1, padding=0, stride=1):
2959 r"""Extracts sliding local blocks from an batched input tensor. 2962 Currently, only 4-D input tensors (batched image-like tensors) are 2967 More than one element of the unfolded tensor may refer to a single 2968 memory location. As a result, in-place operations (especially ones that 2969 are vectorized) may result in incorrect behavior. If you need to write 2970 to the tensor, please clone it first. 2973 See :class:`torch.nn.Unfold` for details 2976 if input.dim() == 4:
2977 msg =
'{} must be int or 2-tuple for 4D input' 2978 assert_int_or_pair(kernel_size,
'kernel_size', msg)
2979 assert_int_or_pair(dilation,
'dilation', msg)
2980 assert_int_or_pair(padding,
'padding', msg)
2981 assert_int_or_pair(stride,
'stride', msg)
2983 ret = torch._C._nn.thnn_im2col(input, _pair(kernel_size),
2984 _pair(dilation), _pair(padding), _pair(stride))
2986 raise NotImplementedError(
"Input Error: Only 4D input Tensors are supported (got {}D)".format(input.dim()))
2992 def fold(input, output_size, kernel_size, dilation=1, padding=0, stride=1):
2994 r"""Combines an array of sliding local blocks into a large containing 2998 Currently, only 4-D output tensors (batched image-like tensors) are 3001 See :class:`torch.nn.Fold` for details 3003 if input.dim() == 3:
3004 msg =
'{} must be int or 2-tuple for 3D input' 3005 assert_int_or_pair(output_size,
'output_size', msg)
3006 assert_int_or_pair(kernel_size,
'kernel_size', msg)
3007 assert_int_or_pair(dilation,
'dilation', msg)
3008 assert_int_or_pair(padding,
'padding', msg)
3009 assert_int_or_pair(stride,
'stride', msg)
3011 ret = torch._C._nn.thnn_col2im(input, _pair(output_size), _pair(kernel_size),
3012 _pair(dilation), _pair(padding), _pair(stride))
3014 raise NotImplementedError(
"Input Error: Only 3D input Tensors are supported (got {}D)".format(input.dim()))
3020 def pad_circular(input, padding):
3024 :param input: tensor of shape :math:`(N, C_{\text{in}}, H, [W, D]))` 3025 :param padding: (tuple): m-elem tuple where m is the degree of convolution 3027 :return: tensor of shape :math:`(N, C_{\text{in}}, [D + 2 * padding[0], 3028 H + 2 * padding[1]], W + 2 * padding[2]))` 3031 input = torch.cat([input, input[:, :, 0:padding[-1]]], dim=2)
3032 input = torch.cat([input[:, :, -(padding[-1] + padding[-2]):-padding[-1]], input], dim=2)
3034 if len(padding) > 2:
3035 input = torch.cat([input, input[:, :, :, 0:padding[-3]]], dim=3)
3036 input = torch.cat([input[:, :, :, -(padding[-3] + padding[-4]):-padding[-3]], input], dim=3)
3038 if len(padding) > 4:
3039 input = torch.cat([input, input[:, :, :, :, 0:padding[-5]]], dim=4)
3040 input = torch.cat([input[:, :, :, :, -(padding[-5] + padding[-6]):-padding[-5]], input], dim=4)
def annotate(the_type, the_value)
Module caffe2.python.helpers.dropout.
def boolean_dispatch(arg_name, arg_index, default, if_true, if_false, module_name, func_name)