4 from .module
import Module
5 from .container
import Sequential
6 from .activation
import LogSoftmax
7 from ..
import functional
as F
8 from ..
import _reduction
as _Reduction
9 from ..._jit_internal
import weak_module, weak_script_method
13 def __init__(self, size_average=None, reduce=None, reduction='mean'):
14 super(_Loss, self).__init__()
15 if size_average
is not None or reduce
is not None:
16 self.
reduction = _Reduction.legacy_get_string(size_average, reduce)
22 def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean'):
23 super(_WeightedLoss, self).__init__(size_average, reduce, reduction)
24 self.register_buffer(
'weight', weight)
29 r"""Creates a criterion that measures the mean absolute error (MAE) between each element in 30 the input :math:`x` and target :math:`y`. 32 The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as: 35 \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad 36 l_n = \left| x_n - y_n \right|, 38 where :math:`N` is the batch size. If :attr:`reduction` is not ``'none'`` 39 (default ``'mean'``), then: 44 \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ 45 \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} 48 :math:`x` and :math:`y` are tensors of arbitrary shapes with a total 49 of :math:`n` elements each. 51 The sum operation still operates over all the elements, and divides by :math:`n`. 53 The division by :math:`n` can be avoided if one sets ``reduction = 'sum'``. 56 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 57 the losses are averaged over each loss element in the batch. Note that for 58 some losses, there are multiple elements per sample. If the field :attr:`size_average` 59 is set to ``False``, the losses are instead summed for each minibatch. Ignored 60 when reduce is ``False``. Default: ``True`` 61 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 62 losses are averaged or summed over observations for each minibatch depending 63 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 64 batch element instead and ignores :attr:`size_average`. Default: ``True`` 65 reduction (string, optional): Specifies the reduction to apply to the output: 66 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 67 ``'mean'``: the sum of the output will be divided by the number of 68 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 69 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 70 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 73 - Input: :math:`(N, *)` where :math:`*` means, any number of additional 75 - Target: :math:`(N, *)`, same shape as the input 76 - Output: scalar. If :attr:`reduction` is ``'none'``, then 77 :math:`(N, *)`, same shape as the input 81 >>> loss = nn.L1Loss() 82 >>> input = torch.randn(3, 5, requires_grad=True) 83 >>> target = torch.randn(3, 5) 84 >>> output = loss(input, target) 87 __constants__ = [
'reduction']
89 def __init__(self, size_average=None, reduce=None, reduction='mean'):
90 super(L1Loss, self).__init__(size_average, reduce, reduction)
93 def forward(self, input, target):
94 return F.l1_loss(input, target, reduction=self.
reduction)
99 r"""The negative log likelihood loss. It is useful to train a classification 100 problem with `C` classes. 102 If provided, the optional argument :attr:`weight` should be a 1D Tensor assigning 103 weight to each of the classes. This is particularly useful when you have an 104 unbalanced training set. 106 The `input` given through a forward call is expected to contain 107 log-probabilities of each class. `input` has to be a Tensor of size either 108 :math:`(minibatch, C)` or :math:`(minibatch, C, d_1, d_2, ..., d_K)` 109 with :math:`K \geq 1` for the `K`-dimensional case (described later). 111 Obtaining log-probabilities in a neural network is easily achieved by 112 adding a `LogSoftmax` layer in the last layer of your network. 113 You may use `CrossEntropyLoss` instead, if you prefer not to add an extra 116 The `target` that this loss expects should be a class index in the range :math:`[0, C-1]` 117 where `C = number of classes`; if `ignore_index` is specified, this loss also accepts 118 this class index (this index may not necessarily be in the class range). 120 The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as: 123 \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad 124 l_n = - w_{y_n} x_{n,y_n}, \quad 125 w_{c} = \text{weight}[c] \cdot \mathbb{1}\{c \not= \text{ignore\_index}\}, 127 where :math:`N` is the batch size. If :attr:`reduction` is not ``'none'`` 128 (default ``'mean'``), then 131 \ell(x, y) = \begin{cases} 132 \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n}} l_n, & 133 \text{if reduction} = \text{'mean';}\\ 135 \text{if reduction} = \text{'sum'.} 138 Can also be used for higher dimension inputs, such as 2D images, by providing 139 an input of size :math:`(minibatch, C, d_1, d_2, ..., d_K)` with :math:`K \geq 1`, 140 where :math:`K` is the number of dimensions, and a target of appropriate shape 141 (see below). In the case of images, it computes NLL loss per-pixel. 144 weight (Tensor, optional): a manual rescaling weight given to each 145 class. If given, it has to be a Tensor of size `C`. Otherwise, it is 146 treated as if having all ones. 147 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 148 the losses are averaged over each loss element in the batch. Note that for 149 some losses, there are multiple elements per sample. If the field :attr:`size_average` 150 is set to ``False``, the losses are instead summed for each minibatch. Ignored 151 when reduce is ``False``. Default: ``True`` 152 ignore_index (int, optional): Specifies a target value that is ignored 153 and does not contribute to the input gradient. When 154 :attr:`size_average` is ``True``, the loss is averaged over 156 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 157 losses are averaged or summed over observations for each minibatch depending 158 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 159 batch element instead and ignores :attr:`size_average`. Default: ``True`` 160 reduction (string, optional): Specifies the reduction to apply to the output: 161 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 162 ``'mean'``: the sum of the output will be divided by the number of 163 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 164 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 165 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 168 - Input: :math:`(N, C)` where `C = number of classes`, or 169 :math:`(N, C, d_1, d_2, ..., d_K)` with :math:`K \geq 1` 170 in the case of `K`-dimensional loss. 171 - Target: :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`, or 172 :math:`(N, d_1, d_2, ..., d_K)` with :math:`K \geq 1` in the case of 175 If :attr:`reduction` is ``'none'``, then the same size as the target: :math:`(N)`, or 176 :math:`(N, d_1, d_2, ..., d_K)` with :math:`K \geq 1` in the case 177 of K-dimensional loss. 181 >>> m = nn.LogSoftmax(dim=1) 182 >>> loss = nn.NLLLoss() 183 >>> # input is of size N x C = 3 x 5 184 >>> input = torch.randn(3, 5, requires_grad=True) 185 >>> # each element in target has to have 0 <= value < C 186 >>> target = torch.tensor([1, 0, 4]) 187 >>> output = loss(m(input), target) 188 >>> output.backward() 191 >>> # 2D loss example (used, for example, with image inputs) 193 >>> loss = nn.NLLLoss() 194 >>> # input is of size N x C x height x width 195 >>> data = torch.randn(N, 16, 10, 10) 196 >>> conv = nn.Conv2d(16, C, (3, 3)) 197 >>> m = nn.LogSoftmax(dim=1) 198 >>> # each element in target has to have 0 <= value < C 199 >>> target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C) 200 >>> output = loss(m(conv(data)), target) 201 >>> output.backward() 203 __constants__ = [
'ignore_index',
'weight',
'reduction']
205 def __init__(self, weight=None, size_average=None, ignore_index=-100,
206 reduce=
None, reduction=
'mean'):
207 super(NLLLoss, self).__init__(weight, size_average, reduce, reduction)
211 def forward(self, input, target):
212 return F.nll_loss(input, target, weight=self.weight, ignore_index=self.
ignore_index, reduction=self.
reduction)
217 def __init__(self, weight=None, size_average=None, ignore_index=-100,
218 reduce=
None, reduction=
'mean'):
219 warnings.warn(
"NLLLoss2d has been deprecated. " 220 "Please use NLLLoss instead as a drop-in replacement and see " 221 "https://pytorch.org/docs/master/nn.html#torch.nn.NLLLoss for more details.")
222 super(NLLLoss2d, self).__init__(weight, size_average, ignore_index, reduce, reduction)
227 r"""Negative log likelihood loss with Poisson distribution of target. 229 The loss can be described as: 232 \text{target} \sim \mathrm{Poisson}(\text{input}) 234 \text{loss}(\text{input}, \text{target}) = \text{input} - \text{target} * \log(\text{input}) 235 + \log(\text{target!}) 237 The last term can be omitted or approximated with Stirling formula. The 238 approximation is used for target values more than 1. For targets less or 239 equal to 1 zeros are added to the loss. 242 log_input (bool, optional): if ``True`` the loss is computed as 243 :math:`\exp(\text{input}) - \text{target}*\text{input}`, if ``False`` the loss is 244 :math:`\text{input} - \text{target}*\log(\text{input}+\text{eps})`. 245 full (bool, optional): whether to compute full loss, i. e. to add the 246 Stirling approximation term 249 \text{target}*\log(\text{target}) - \text{target} + 0.5 * \log(2\pi\text{target}). 250 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 251 the losses are averaged over each loss element in the batch. Note that for 252 some losses, there are multiple elements per sample. If the field :attr:`size_average` 253 is set to ``False``, the losses are instead summed for each minibatch. Ignored 254 when reduce is ``False``. Default: ``True`` 255 eps (float, optional): Small value to avoid evaluation of :math:`\log(0)` when 256 :attr:`log_input = False`. Default: 1e-8 257 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 258 losses are averaged or summed over observations for each minibatch depending 259 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 260 batch element instead and ignores :attr:`size_average`. Default: ``True`` 261 reduction (string, optional): Specifies the reduction to apply to the output: 262 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 263 ``'mean'``: the sum of the output will be divided by the number of 264 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 265 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 266 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 270 >>> loss = nn.PoissonNLLLoss() 271 >>> log_input = torch.randn(5, 2, requires_grad=True) 272 >>> target = torch.randn(5, 2) 273 >>> output = loss(log_input, target) 274 >>> output.backward() 277 - Input: :math:`(N, *)` where :math:`*` means, any number of additional 279 - Target: :math:`(N, *)`, same shape as the input 280 - Output: scalar by default. If :attr:`reduction` is ``'none'``, then :math:`(N, *)`, 281 the same shape as the input 283 __constants__ = [
'log_input',
'full',
'eps',
'reduction']
285 def __init__(self, log_input=True, full=False, size_average=None,
286 eps=1e-8, reduce=
None, reduction=
'mean'):
287 super(PoissonNLLLoss, self).__init__(size_average, reduce, reduction)
293 def forward(self, log_input, target):
294 return F.poisson_nll_loss(log_input, target, log_input=self.
log_input, full=self.
full,
300 r"""The `Kullback-Leibler divergence`_ Loss 302 KL divergence is a useful distance measure for continuous distributions 303 and is often useful when performing direct regression over the space of 304 (discretely sampled) continuous output distributions. 306 As with :class:`~torch.nn.NLLLoss`, the `input` given is expected to contain 307 *log-probabilities* and is not restricted to a 2D Tensor. 308 The targets are given as *probabilities* (i.e. without taking the logarithm). 310 This criterion expects a `target` `Tensor` of the same size as the 313 The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as: 316 l(x,y) = L = \{ l_1,\dots,l_N \}, \quad 317 l_n = y_n \cdot \left( \log y_n - x_n \right) 319 where the index :math:`N` spans all dimensions of ``input`` and :math:`L` has the same 320 shape as ``input``. If :attr:`reduction` is not ``'none'`` (default ``'mean'``), then: 323 \ell(x, y) = \begin{cases} 324 \operatorname{mean}(L), & \text{if reduction} = \text{'mean';} \\ 325 \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} 328 In default :attr:`reduction` mode ``'mean'``, the losses are averaged for each minibatch over observations 329 **as well as** over dimensions. ``'batchmean'`` mode gives the correct KL divergence where losses 330 are averaged over batch dimension only. ``'mean'`` mode's behavior will be changed to the same as 331 ``'batchmean'`` in the next major release. 333 .. _Kullback-Leibler divergence: 334 https://en.wikipedia.org/wiki/Kullback-Leibler_divergence 337 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 338 the losses are averaged over each loss element in the batch. Note that for 339 some losses, there are multiple elements per sample. If the field :attr:`size_average` 340 is set to ``False``, the losses are instead summed for each minibatch. Ignored 341 when reduce is ``False``. Default: ``True`` 342 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 343 losses are averaged or summed over observations for each minibatch depending 344 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 345 batch element instead and ignores :attr:`size_average`. Default: ``True`` 346 reduction (string, optional): Specifies the reduction to apply to the output: 347 ``'none'`` | ``'batchmean'`` | ``'sum'`` | ``'mean'``. 348 ``'none'``: no reduction will be applied. 349 ``'batchmean'``: the sum of the output will be divided by batchsize. 350 ``'sum'``: the output will be summed. 351 ``'mean'``: the output will be divided by the number of elements in the output. 355 :attr:`size_average` and :attr:`reduce` are in the process of being deprecated, 356 and in the meantime, specifying either of those two args will override :attr:`reduction`. 359 :attr:``reduction`` = ``'mean'`` doesn't return the true kl divergence value, please use 360 :attr:``reduction`` = ``'batchmean'`` which aligns with KL math definition. 361 In the next major release, ``'mean'`` will be changed to be the same as ``'batchmean'``. 364 - Input: :math:`(N, *)` where :math:`*` means, any number of additional 366 - Target: :math:`(N, *)`, same shape as the input 367 - Output: scalar by default. If :attr:``reduction`` is ``'none'``, then :math:`(N, *)`, 368 the same shape as the input 371 __constants__ = [
'reduction']
373 def __init__(self, size_average=None, reduce=None, reduction='mean'):
374 super(KLDivLoss, self).__init__(size_average, reduce, reduction)
377 def forward(self, input, target):
378 return F.kl_div(input, target, reduction=self.
reduction)
383 r"""Creates a criterion that measures the mean squared error (squared L2 norm) between 384 each element in the input :math:`x` and target :math:`y`. 386 The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as: 389 \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad 390 l_n = \left( x_n - y_n \right)^2, 392 where :math:`N` is the batch size. If :attr:`reduction` is not ``'none'`` 393 (default ``'mean'``), then: 398 \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ 399 \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} 402 :math:`x` and :math:`y` are tensors of arbitrary shapes with a total 403 of :math:`n` elements each. 405 The sum operation still operates over all the elements, and divides by :math:`n`. 407 The division by :math:`n` can be avoided if one sets ``reduction = 'sum'``. 410 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 411 the losses are averaged over each loss element in the batch. Note that for 412 some losses, there are multiple elements per sample. If the field :attr:`size_average` 413 is set to ``False``, the losses are instead summed for each minibatch. Ignored 414 when reduce is ``False``. Default: ``True`` 415 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 416 losses are averaged or summed over observations for each minibatch depending 417 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 418 batch element instead and ignores :attr:`size_average`. Default: ``True`` 419 reduction (string, optional): Specifies the reduction to apply to the output: 420 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 421 ``'mean'``: the sum of the output will be divided by the number of 422 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 423 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 424 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 427 - Input: :math:`(N, *)` where :math:`*` means, any number of additional 429 - Target: :math:`(N, *)`, same shape as the input 433 >>> loss = nn.MSELoss() 434 >>> input = torch.randn(3, 5, requires_grad=True) 435 >>> target = torch.randn(3, 5) 436 >>> output = loss(input, target) 437 >>> output.backward() 439 __constants__ = [
'reduction']
441 def __init__(self, size_average=None, reduce=None, reduction='mean'):
442 super(MSELoss, self).__init__(size_average, reduce, reduction)
445 def forward(self, input, target):
446 return F.mse_loss(input, target, reduction=self.
reduction)
451 r"""Creates a criterion that measures the Binary Cross Entropy 452 between the target and the output: 454 The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as: 457 \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad 458 l_n = - w_n \left[ y_n \cdot \log x_n + (1 - y_n) \cdot \log (1 - x_n) \right], 460 where :math:`N` is the batch size. If :attr:`reduction` is not ``'none'`` 461 (default ``'mean'``), then 464 \ell(x, y) = \begin{cases} 465 \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ 466 \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} 469 This is used for measuring the error of a reconstruction in for example 470 an auto-encoder. Note that the targets :math:`y` should be numbers 474 weight (Tensor, optional): a manual rescaling weight given to the loss 475 of each batch element. If given, has to be a Tensor of size `nbatch`. 476 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 477 the losses are averaged over each loss element in the batch. Note that for 478 some losses, there are multiple elements per sample. If the field :attr:`size_average` 479 is set to ``False``, the losses are instead summed for each minibatch. Ignored 480 when reduce is ``False``. Default: ``True`` 481 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 482 losses are averaged or summed over observations for each minibatch depending 483 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 484 batch element instead and ignores :attr:`size_average`. Default: ``True`` 485 reduction (string, optional): Specifies the reduction to apply to the output: 486 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 487 ``'mean'``: the sum of the output will be divided by the number of 488 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 489 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 490 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 493 - Input: :math:`(N, *)` where :math:`*` means, any number of additional 495 - Target: :math:`(N, *)`, same shape as the input 496 - Output: scalar. If :attr:`reduction` is ``'none'``, then :math:`(N, *)`, same 502 >>> loss = nn.BCELoss() 503 >>> input = torch.randn(3, requires_grad=True) 504 >>> target = torch.empty(3).random_(2) 505 >>> output = loss(m(input), target) 506 >>> output.backward() 508 __constants__ = [
'reduction',
'weight']
510 def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean'):
511 super(BCELoss, self).__init__(weight, size_average, reduce, reduction)
514 def forward(self, input, target):
515 return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.
reduction)
520 r"""This loss combines a `Sigmoid` layer and the `BCELoss` in one single 521 class. This version is more numerically stable than using a plain `Sigmoid` 522 followed by a `BCELoss` as, by combining the operations into one layer, 523 we take advantage of the log-sum-exp trick for numerical stability. 525 The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as: 528 \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad 529 l_n = - w_n \left[ y_n \cdot \log \sigma(x_n) 530 + (1 - y_n) \cdot \log (1 - \sigma(x_n)) \right], 532 where :math:`N` is the batch size. If :attr:`reduction` is not ``'none'`` 533 (default ``'mean'``), then 536 \ell(x, y) = \begin{cases} 537 \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ 538 \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} 541 This is used for measuring the error of a reconstruction in for example 542 an auto-encoder. Note that the targets `t[i]` should be numbers 545 It's possible to trade off recall and precision by adding weights to positive examples. 546 In this case the loss can be described as: 549 \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad 550 l_n = - w_n \left[ p_n y_n \cdot \log \sigma(x_n) 551 + (1 - y_n) \cdot \log (1 - \sigma(x_n)) \right], 553 where :math:`p_n` is the weight of the positive class for sample :math:`n` in the batch. 554 :math:`p_n > 1` increases the recall, :math:`p_n < 1` increases the precision. 556 For example, if a dataset contains 100 positive and 300 negative examples of a single class, 557 then `pos_weight` for the class should be equal to :math:`\frac{300}{100}=3`. 558 The loss would act as if the dataset contains :math:`3\times 100=300` positive examples. 561 weight (Tensor, optional): a manual rescaling weight given to the loss 562 of each batch element. If given, has to be a Tensor of size `nbatch`. 563 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 564 the losses are averaged over each loss element in the batch. Note that for 565 some losses, there are multiple elements per sample. If the field :attr:`size_average` 566 is set to ``False``, the losses are instead summed for each minibatch. Ignored 567 when reduce is ``False``. Default: ``True`` 568 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 569 losses are averaged or summed over observations for each minibatch depending 570 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 571 batch element instead and ignores :attr:`size_average`. Default: ``True`` 572 reduction (string, optional): Specifies the reduction to apply to the output: 573 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 574 ``'mean'``: the sum of the output will be divided by the number of 575 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 576 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 577 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 578 pos_weight (Tensor, optional): a weight of positive examples. 579 Must be a vector with length equal to the number of classes. 582 - Input: :math:`(N, *)` where :math:`*` means, any number of additional dimensions 583 - Target: :math:`(N, *)`, same shape as the input 584 - Output: scalar. If :attr:`reduction` is ``'none'``, then :math:`(N, *)`, same 589 >>> loss = nn.BCEWithLogitsLoss() 590 >>> input = torch.randn(3, requires_grad=True) 591 >>> target = torch.empty(3).random_(2) 592 >>> output = loss(input, target) 593 >>> output.backward() 595 __constants__ = [
'weight',
'pos_weight',
'reduction']
597 def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean', pos_weight=None):
598 super(BCEWithLogitsLoss, self).__init__(size_average, reduce, reduction)
599 self.register_buffer(
'weight', weight)
600 self.register_buffer(
'pos_weight', pos_weight)
603 def forward(self, input, target):
604 return F.binary_cross_entropy_with_logits(input, target,
606 pos_weight=self.pos_weight,
612 r"""Measures the loss given an input tensor :math:`x` and a labels tensor :math:`y` 613 (containing 1 or -1). 614 This is usually used for measuring whether two inputs are similar or 615 dissimilar, e.g. using the L1 pairwise distance as :math:`x`, and is typically 616 used for learning nonlinear embeddings or semi-supervised learning. 618 The loss function for :math:`n`-th sample in the mini-batch is 622 x_n, & \text{if}\; y_n = 1,\\ 623 \max \{0, \Delta - x_n\}, & \text{if}\; y_n = -1, 626 and the total loss functions is 629 \ell(x, y) = \begin{cases} 630 \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ 631 \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} 634 where :math:`L = \{l_1,\dots,l_N\}^\top`. 637 margin (float, optional): Has a default value of `1`. 638 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 639 the losses are averaged over each loss element in the batch. Note that for 640 some losses, there are multiple elements per sample. If the field :attr:`size_average` 641 is set to ``False``, the losses are instead summed for each minibatch. Ignored 642 when reduce is ``False``. Default: ``True`` 643 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 644 losses are averaged or summed over observations for each minibatch depending 645 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 646 batch element instead and ignores :attr:`size_average`. Default: ``True`` 647 reduction (string, optional): Specifies the reduction to apply to the output: 648 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 649 ``'mean'``: the sum of the output will be divided by the number of 650 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 651 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 652 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 655 - Input: :math:`(*)` where :math:`*` means, any number of dimensions. The sum operation 656 operates over all the elements. 657 - Target: :math:`(*)`, same shape as the input 658 - Output: scalar. If :attr:``reduction`` is ``'none'``, then same shape as the input 660 __constants__ = [
'margin',
'reduction']
662 def __init__(self, margin=1.0, size_average=None, reduce=None, reduction='mean'):
663 super(HingeEmbeddingLoss, self).__init__(size_average, reduce, reduction)
667 def forward(self, input, target):
668 return F.hinge_embedding_loss(input, target, margin=self.
margin, reduction=self.
reduction)
673 r"""Creates a criterion that optimizes a multi-class multi-classification 674 hinge loss (margin-based loss) between input :math:`x` (a 2D mini-batch `Tensor`) 675 and output :math:`y` (which is a 2D `Tensor` of target class indices). 676 For each sample in the mini-batch: 679 \text{loss}(x, y) = \sum_{ij}\frac{\max(0, 1 - (x[y[j]] - x[i]))}{\text{x.size}(0)} 681 where :math:`x \in \left\{0, \; \cdots , \; \text{x.size}(0) - 1\right\}`, \ 682 :math:`y \in \left\{0, \; \cdots , \; \text{y.size}(0) - 1\right\}`, \ 683 :math:`0 \leq y[j] \leq \text{x.size}(0)-1`, \ 684 and :math:`i \neq y[j]` for all :math:`i` and :math:`j`. 686 :math:`y` and :math:`x` must have the same size. 688 The criterion only considers a contiguous block of non-negative targets that 691 This allows for different samples to have variable amounts of target classes. 694 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 695 the losses are averaged over each loss element in the batch. Note that for 696 some losses, there are multiple elements per sample. If the field :attr:`size_average` 697 is set to ``False``, the losses are instead summed for each minibatch. Ignored 698 when reduce is ``False``. Default: ``True`` 699 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 700 losses are averaged or summed over observations for each minibatch depending 701 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 702 batch element instead and ignores :attr:`size_average`. Default: ``True`` 703 reduction (string, optional): Specifies the reduction to apply to the output: 704 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 705 ``'mean'``: the sum of the output will be divided by the number of 706 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 707 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 708 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 711 - Input: :math:`(C)` or :math:`(N, C)` where `N` is the batch size and `C` 712 is the number of classes. 713 - Target: :math:`(C)` or :math:`(N, C)`, label targets padded by -1 ensuring same shape as the input. 714 - Output: scalar. If :attr:``reduction`` is ``'none'``, then :math:`(N)`. 718 >>> loss = nn.MultiLabelMarginLoss() 719 >>> x = torch.FloatTensor([[0.1, 0.2, 0.4, 0.8]]) 720 >>> # for target y, only consider labels 3 and 0, not after label -1 721 >>> y = torch.LongTensor([[3, 0, -1, 1]]) 723 >>> # 0.25 * ((1-(0.1-0.2)) + (1-(0.1-0.4)) + (1-(0.8-0.2)) + (1-(0.8-0.4))) 727 __constants__ = [
'reduction']
729 def __init__(self, size_average=None, reduce=None, reduction='mean'):
730 super(MultiLabelMarginLoss, self).__init__(size_average, reduce, reduction)
733 def forward(self, input, target):
734 return F.multilabel_margin_loss(input, target, reduction=self.
reduction)
739 r"""Creates a criterion that uses a squared term if the absolute 740 element-wise error falls below 1 and an L1 term otherwise. 741 It is less sensitive to outliers than the `MSELoss` and in some cases 742 prevents exploding gradients (e.g. see "Fast R-CNN" paper by Ross Girshick). 743 Also known as the Huber loss: 746 \text{loss}(x, y) = \frac{1}{n} \sum_{i} z_{i} 748 where :math:`z_{i}` is given by: 753 0.5 (x_i - y_i)^2, & \text{if } |x_i - y_i| < 1 \\ 754 |x_i - y_i| - 0.5, & \text{otherwise } 757 :math:`x` and :math:`y` arbitrary shapes with a total of :math:`n` elements each 758 the sum operation still operates over all the elements, and divides by :math:`n`. 760 The division by :math:`n` can be avoided if sets ``reduction = 'sum'``. 763 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 764 the losses are averaged over each loss element in the batch. Note that for 765 some losses, there are multiple elements per sample. If the field :attr:`size_average` 766 is set to ``False``, the losses are instead summed for each minibatch. Ignored 767 when reduce is ``False``. Default: ``True`` 768 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 769 losses are averaged or summed over observations for each minibatch depending 770 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 771 batch element instead and ignores :attr:`size_average`. Default: ``True`` 772 reduction (string, optional): Specifies the reduction to apply to the output: 773 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 774 ``'mean'``: the sum of the output will be divided by the number of 775 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 776 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 777 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 780 - Input: :math:`(N, *)` where :math:`*` means, any number of additional 782 - Target: :math:`(N, *)`, same shape as the input 783 - Output: scalar. If :attr:`reduction` is ``'none'``, then 784 :math:`(N, *)`, same shape as the input 787 __constants__ = [
'reduction']
789 def __init__(self, size_average=None, reduce=None, reduction='mean'):
790 super(SmoothL1Loss, self).__init__(size_average, reduce, reduction)
793 def forward(self, input, target):
794 return F.smooth_l1_loss(input, target, reduction=self.
reduction)
799 r"""Creates a criterion that optimizes a two-class classification 800 logistic loss between input tensor :math:`x` and target tensor :math:`y` 801 (containing 1 or -1). 804 \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()} 807 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 808 the losses are averaged over each loss element in the batch. Note that for 809 some losses, there are multiple elements per sample. If the field :attr:`size_average` 810 is set to ``False``, the losses are instead summed for each minibatch. Ignored 811 when reduce is ``False``. Default: ``True`` 812 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 813 losses are averaged or summed over observations for each minibatch depending 814 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 815 batch element instead and ignores :attr:`size_average`. Default: ``True`` 816 reduction (string, optional): Specifies the reduction to apply to the output: 817 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 818 ``'mean'``: the sum of the output will be divided by the number of 819 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 820 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 821 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 824 - Input: :math:`(*)` where :math:`*` means, any number of additional 826 - Target: :math:`(*)`, same shape as the input 827 - Output: scalar. If :attr:`reduction` is ``'none'``, then same shape as the input 830 __constants__ = [
'reduction']
832 def __init__(self, size_average=None, reduce=None, reduction='mean'):
833 super(SoftMarginLoss, self).__init__(size_average, reduce, reduction)
836 def forward(self, input, target):
837 return F.soft_margin_loss(input, target, reduction=self.
reduction)
842 r"""This criterion combines :func:`nn.LogSoftmax` and :func:`nn.NLLLoss` in one single class. 844 It is useful when training a classification problem with `C` classes. 845 If provided, the optional argument :attr:`weight` should be a 1D `Tensor` 846 assigning weight to each of the classes. 847 This is particularly useful when you have an unbalanced training set. 849 The `input` is expected to contain raw, unnormalized scores for each class. 851 `input` has to be a Tensor of size either :math:`(minibatch, C)` or 852 :math:`(minibatch, C, d_1, d_2, ..., d_K)` 853 with :math:`K \geq 1` for the `K`-dimensional case (described later). 855 This criterion expects a class index in the range :math:`[0, C-1]` as the 856 `target`for each value of a 1D tensor of size `minibatch`; if `ignore_index` 857 is specified, this criterion also accepts this class index (this index may not 858 necessarily be in the class range). 860 The loss can be described as: 863 \text{loss}(x, class) = -\log\left(\frac{\exp(x[class])}{\sum_j \exp(x[j])}\right) 864 = -x[class] + \log\left(\sum_j \exp(x[j])\right) 866 or in the case of the :attr:`weight` argument being specified: 869 \text{loss}(x, class) = weight[class] \left(-x[class] + \log\left(\sum_j \exp(x[j])\right)\right) 871 The losses are averaged across observations for each minibatch. 873 Can also be used for higher dimension inputs, such as 2D images, by providing 874 an input of size :math:`(minibatch, C, d_1, d_2, ..., d_K)` with :math:`K \geq 1`, 875 where :math:`K` is the number of dimensions, and a target of appropriate shape 880 weight (Tensor, optional): a manual rescaling weight given to each class. 881 If given, has to be a Tensor of size `C` 882 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 883 the losses are averaged over each loss element in the batch. Note that for 884 some losses, there are multiple elements per sample. If the field :attr:`size_average` 885 is set to ``False``, the losses are instead summed for each minibatch. Ignored 886 when reduce is ``False``. Default: ``True`` 887 ignore_index (int, optional): Specifies a target value that is ignored 888 and does not contribute to the input gradient. When :attr:`size_average` is 889 ``True``, the loss is averaged over non-ignored targets. 890 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 891 losses are averaged or summed over observations for each minibatch depending 892 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 893 batch element instead and ignores :attr:`size_average`. Default: ``True`` 894 reduction (string, optional): Specifies the reduction to apply to the output: 895 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 896 ``'mean'``: the sum of the output will be divided by the number of 897 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 898 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 899 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 902 - Input: :math:`(N, C)` where `C = number of classes`, or 903 :math:`(N, C, d_1, d_2, ..., d_K)` with :math:`K \geq 1` 904 in the case of `K`-dimensional loss. 905 - Target: :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`, or 906 :math:`(N, d_1, d_2, ..., d_K)` with :math:`K \geq 1` in the case of 909 If :attr:`reduction` is ``'none'``, then the same size as the target: 911 :math:`(N, d_1, d_2, ..., d_K)` with :math:`K \geq 1` in the case 912 of K-dimensional loss. 916 >>> loss = nn.CrossEntropyLoss() 917 >>> input = torch.randn(3, 5, requires_grad=True) 918 >>> target = torch.empty(3, dtype=torch.long).random_(5) 919 >>> output = loss(input, target) 920 >>> output.backward() 922 __constants__ = [
'weight',
'ignore_index',
'reduction']
924 def __init__(self, weight=None, size_average=None, ignore_index=-100,
925 reduce=
None, reduction=
'mean'):
926 super(CrossEntropyLoss, self).__init__(weight, size_average, reduce, reduction)
930 def forward(self, input, target):
931 return F.cross_entropy(input, target, weight=self.weight,
937 r"""Creates a criterion that optimizes a multi-label one-versus-all 938 loss based on max-entropy, between input :math:`x` and target :math:`y` of size 940 For each sample in the minibatch: 943 loss(x, y) = - \frac{1}{C} * \sum_i y[i] * \log((1 + \exp(-x[i]))^{-1}) 944 + (1-y[i]) * \log\left(\frac{\exp(-x[i])}{(1 + \exp(-x[i]))}\right) 946 where :math:`i \in \left\{0, \; \cdots , \; \text{x.nElement}() - 1\right\}`, 947 :math:`y[i] \in \left\{0, \; 1\right\}`. 950 weight (Tensor, optional): a manual rescaling weight given to each 951 class. If given, it has to be a Tensor of size `C`. Otherwise, it is 952 treated as if having all ones. 953 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 954 the losses are averaged over each loss element in the batch. Note that for 955 some losses, there are multiple elements per sample. If the field :attr:`size_average` 956 is set to ``False``, the losses are instead summed for each minibatch. Ignored 957 when reduce is ``False``. Default: ``True`` 958 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 959 losses are averaged or summed over observations for each minibatch depending 960 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 961 batch element instead and ignores :attr:`size_average`. Default: ``True`` 962 reduction (string, optional): Specifies the reduction to apply to the output: 963 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 964 ``'mean'``: the sum of the output will be divided by the number of 965 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 966 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 967 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 970 - Input: :math:`(N, C)` where `N` is the batch size and `C` is the number of classes. 971 - Target: :math:`(N, C)`, label targets padded by -1 ensuring same shape as the input. 972 - Output: scalar. If :attr:`reduction` is ``'none'``, then :math:`(N)`. 974 __constants__ = [
'weight',
'reduction']
976 def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean'):
977 super(MultiLabelSoftMarginLoss, self).__init__(weight, size_average, reduce, reduction)
980 def forward(self, input, target):
981 return F.multilabel_soft_margin_loss(input, target, weight=self.weight, reduction=self.
reduction)
986 r"""Creates a criterion that measures the loss given input tensors 987 :math:`x_1`, :math:`x_2` and a `Tensor` label :math:`y` with values 1 or -1. 988 This is used for measuring whether two inputs are similar or dissimilar, 989 using the cosine distance, and is typically used for learning nonlinear 990 embeddings or semi-supervised learning. 992 The loss function for each sample is: 997 1 - \cos(x_1, x_2), & \text{if } y = 1 \\ 998 \max(0, \cos(x_1, x_2) - \text{margin}), & \text{if } y = -1 1002 margin (float, optional): Should be a number from :math:`-1` to :math:`1`, 1003 :math:`0` to :math:`0.5` is suggested. If :attr:`margin` is missing, the 1004 default value is :math:`0`. 1005 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 1006 the losses are averaged over each loss element in the batch. Note that for 1007 some losses, there are multiple elements per sample. If the field :attr:`size_average` 1008 is set to ``False``, the losses are instead summed for each minibatch. Ignored 1009 when reduce is ``False``. Default: ``True`` 1010 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 1011 losses are averaged or summed over observations for each minibatch depending 1012 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 1013 batch element instead and ignores :attr:`size_average`. Default: ``True`` 1014 reduction (string, optional): Specifies the reduction to apply to the output: 1015 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 1016 ``'mean'``: the sum of the output will be divided by the number of 1017 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 1018 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 1019 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 1021 __constants__ = [
'margin',
'reduction']
1023 def __init__(self, margin=0., size_average=None, reduce=None, reduction='mean'):
1024 super(CosineEmbeddingLoss, self).__init__(size_average, reduce, reduction)
1028 def forward(self, input1, input2, target):
1029 return F.cosine_embedding_loss(input1, input2, target, margin=self.
margin, reduction=self.
reduction)
1034 r"""Creates a criterion that measures the loss given 1035 inputs :math:`x1`, :math:`x2`, two 1D mini-batch `Tensor`s, 1036 and a label 1D mini-batch tensor :math:`y` (containing 1 or -1). 1038 If :math:`y = 1` then it assumed the first input should be ranked higher 1039 (have a larger value) than the second input, and vice-versa for :math:`y = -1`. 1041 The loss function for each sample in the mini-batch is: 1044 \text{loss}(x, y) = \max(0, -y * (x1 - x2) + \text{margin}) 1047 margin (float, optional): Has a default value of :math:`0`. 1048 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 1049 the losses are averaged over each loss element in the batch. Note that for 1050 some losses, there are multiple elements per sample. If the field :attr:`size_average` 1051 is set to ``False``, the losses are instead summed for each minibatch. Ignored 1052 when reduce is ``False``. Default: ``True`` 1053 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 1054 losses are averaged or summed over observations for each minibatch depending 1055 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 1056 batch element instead and ignores :attr:`size_average`. Default: ``True`` 1057 reduction (string, optional): Specifies the reduction to apply to the output: 1058 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 1059 ``'mean'``: the sum of the output will be divided by the number of 1060 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 1061 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 1062 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 1065 - Input: :math:`(N, D)` where `N` is the batch size and `D` is the size of a sample. 1066 - Target: :math:`(N)` 1067 - Output: scalar. If :attr:`reduction` is ``'none'``, then :math:`(N)`. 1069 __constants__ = [
'margin',
'reduction']
1071 def __init__(self, margin=0., size_average=None, reduce=None, reduction='mean'):
1072 super(MarginRankingLoss, self).__init__(size_average, reduce, reduction)
1076 def forward(self, input1, input2, target):
1077 return F.margin_ranking_loss(input1, input2, target, margin=self.
margin, reduction=self.
reduction)
1082 r"""Creates a criterion that optimizes a multi-class classification hinge 1083 loss (margin-based loss) between input :math:`x` (a 2D mini-batch `Tensor`) and 1084 output :math:`y` (which is a 1D tensor of target class indices, 1085 :math:`0 \leq y \leq \text{x.size}(1)-1`): 1087 For each mini-batch sample, the loss in terms of the 1D input :math:`x` and scalar 1088 output :math:`y` is: 1091 \text{loss}(x, y) = \frac{\sum_i \max(0, \text{margin} - x[y] + x[i]))^p}{\text{x.size}(0)} 1093 where :math:`x \in \left\{0, \; \cdots , \; \text{x.size}(0) - 1\right\}` 1094 and :math:`i \neq y`. 1096 Optionally, you can give non-equal weighting on the classes by passing 1097 a 1D :attr:`weight` tensor into the constructor. 1099 The loss function then becomes: 1102 \text{loss}(x, y) = \frac{\sum_i \max(0, w[y] * (\text{margin} - x[y] + x[i]))^p)}{\text{x.size}(0)} 1105 p (int, optional): Has a default value of :math:`1`. :math:`1` and :math:`2` 1106 are the only supported values. 1107 margin (float, optional): Has a default value of :math:`1`. 1108 weight (Tensor, optional): a manual rescaling weight given to each 1109 class. If given, it has to be a Tensor of size `C`. Otherwise, it is 1110 treated as if having all ones. 1111 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 1112 the losses are averaged over each loss element in the batch. Note that for 1113 some losses, there are multiple elements per sample. If the field :attr:`size_average` 1114 is set to ``False``, the losses are instead summed for each minibatch. Ignored 1115 when reduce is ``False``. Default: ``True`` 1116 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 1117 losses are averaged or summed over observations for each minibatch depending 1118 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 1119 batch element instead and ignores :attr:`size_average`. Default: ``True`` 1120 reduction (string, optional): Specifies the reduction to apply to the output: 1121 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 1122 ``'mean'``: the sum of the output will be divided by the number of 1123 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 1124 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 1125 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 1127 __constants__ = [
'p',
'margin',
'weight',
'reduction']
1129 def __init__(self, p=1, margin=1., weight=None, size_average=None,
1130 reduce=
None, reduction=
'mean'):
1131 super(MultiMarginLoss, self).__init__(weight, size_average, reduce, reduction)
1132 if p != 1
and p != 2:
1133 raise ValueError(
"only p == 1 and p == 2 supported")
1134 assert weight
is None or weight.dim() == 1
1139 def forward(self, input, target):
1140 return F.multi_margin_loss(input, target, p=self.
p, margin=self.
margin,
1141 weight=self.weight, reduction=self.
reduction)
1146 r"""Creates a criterion that measures the triplet loss given an input 1147 tensors :math:`x1`, :math:`x2`, :math:`x3` and a margin with a value greater than :math:`0`. 1148 This is used for measuring a relative similarity between samples. A triplet 1149 is composed by `a`, `p` and `n`: `anchor`, `positive examples` and `negative 1150 examples` respectively. The shapes of all input tensors should be 1153 The distance swap is described in detail in the paper `Learning shallow 1154 convolutional feature descriptors with triplet losses`_ by 1155 V. Balntas, E. Riba et al. 1157 The loss function for each sample in the mini-batch is: 1160 L(a, p, n) = \max \{d(a_i, p_i) - d(a_i, n_i) + {\rm margin}, 0\} 1166 d(x_i, y_i) = \left\lVert {\bf x}_i - {\bf y}_i \right\rVert_p 1169 margin (float, optional): Default: :math:`1`. 1170 p (int, optional): The norm degree for pairwise distance. Default: :math:`2`. 1171 swap (bool, optional): The distance swap is described in detail in the paper 1172 `Learning shallow convolutional feature descriptors with triplet losses` by 1173 V. Balntas, E. Riba et al. Default: ``False``. 1174 size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, 1175 the losses are averaged over each loss element in the batch. Note that for 1176 some losses, there are multiple elements per sample. If the field :attr:`size_average` 1177 is set to ``False``, the losses are instead summed for each minibatch. Ignored 1178 when reduce is ``False``. Default: ``True`` 1179 reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the 1180 losses are averaged or summed over observations for each minibatch depending 1181 on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per 1182 batch element instead and ignores :attr:`size_average`. Default: ``True`` 1183 reduction (string, optional): Specifies the reduction to apply to the output: 1184 ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 1185 ``'mean'``: the sum of the output will be divided by the number of 1186 elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` 1187 and :attr:`reduce` are in the process of being deprecated, and in the meantime, 1188 specifying either of those two args will override :attr:`reduction`. Default: ``'mean'`` 1191 - Input: :math:`(N, D)` where :math:`D` is the vector dimension. 1192 - Output: scalar. If :attr:`reduction` is ``'none'``, then :math:`(N)`. 1194 >>> triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2) 1195 >>> input1 = torch.randn(100, 128, requires_grad=True) 1196 >>> input2 = torch.randn(100, 128, requires_grad=True) 1197 >>> input3 = torch.randn(100, 128, requires_grad=True) 1198 >>> output = triplet_loss(input1, input2, input3) 1199 >>> output.backward() 1201 .. _Learning shallow convolutional feature descriptors with triplet losses: 1202 http://www.iis.ee.ic.ac.uk/%7Evbalnt/shallow_descr/TFeat_paper.pdf 1204 __constants__ = [
'margin',
'p',
'eps',
'swap',
'reduction']
1206 def __init__(self, margin=1.0, p=2., eps=1e-6, swap=False, size_average=None,
1207 reduce=
None, reduction=
'mean'):
1208 super(TripletMarginLoss, self).__init__(size_average, reduce, reduction)
1215 def forward(self, anchor, positive, negative):
1216 return F.triplet_margin_loss(anchor, positive, negative, margin=self.
margin, p=self.
p,
1222 r"""The Connectionist Temporal Classification loss. 1225 blank (int, optional): blank label. Default :math:`0`. 1226 reduction (string, optional): Specifies the reduction to apply to the output: 1227 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 1228 'mean': the output losses will be divided by the target lengths and 1229 then the mean over the batch is taken. Default: 'mean' 1230 zero_infinity (bool, optional): 1231 Whether to zero infinite losses and the associated gradients. 1233 Infinite losses mainly occur when the inputs are too short 1234 to be aligned to the targets. 1237 log_probs: Tensor of size :math:`(T, N, C)` where `C = number of characters in alphabet including blank`, 1238 `T = input length`, and `N = batch size`. 1239 The logarithmized probabilities of the outputs 1240 (e.g. obtained with :func:`torch.nn.functional.log_softmax`). 1241 targets: Tensor of size :math:`(N, S)` or `(sum(target_lengths))`. 1242 Targets (cannot be blank). In the second form, the targets are assumed to be concatenated. 1243 input_lengths: Tuple or tensor of size :math:`(N)`. 1244 Lengths of the inputs (must each be :math:`\leq T`) 1245 target_lengths: Tuple or tensor of size :math:`(N)`. 1246 Lengths of the targets 1250 >>> ctc_loss = nn.CTCLoss() 1251 >>> log_probs = torch.randn(50, 16, 20).log_softmax(2).detach().requires_grad_() 1252 >>> targets = torch.randint(1, 20, (16, 30), dtype=torch.long) 1253 >>> input_lengths = torch.full((16,), 50, dtype=torch.long) 1254 >>> target_lengths = torch.randint(10,30,(16,), dtype=torch.long) 1255 >>> loss = ctc_loss(log_probs, targets, input_lengths, target_lengths) 1259 A. Graves et al.: Connectionist Temporal Classification: 1260 Labelling Unsegmented Sequence Data with Recurrent Neural Networks: 1261 https://www.cs.toronto.edu/~graves/icml_2006.pdf 1264 In order to use CuDNN, the following must be satisfied: :attr:`targets` must be 1265 in concatenated format, all :attr:`input_lengths` must be `T`. :math:`blank=0`, 1266 :attr:`target_lengths` :math:`\leq 256`, the integer arguments must be of 1267 dtype :attr:`torch.int32`. 1269 The regular implementation uses the (more common in PyTorch) `torch.long` dtype. 1272 .. include:: cudnn_deterministic.rst 1276 __constants__ = [
'blank',
'reduction']
1278 def __init__(self, blank=0, reduction='mean', zero_infinity=False):
1279 super(CTCLoss, self).__init__(reduction=reduction)
1284 def forward(self, log_probs, targets, input_lengths, target_lengths):
1285 return F.ctc_loss(log_probs, targets, input_lengths, target_lengths, self.
blank, self.
reduction,