import warnings
from .distance import PairwiseDistance
from .module import Module
from .. import functional as F
from .. import _reduction as _Reduction
from torch import Tensor
from typing import Callable, Optional
__all__ = ['L1Loss', 'NLLLoss', 'NLLLoss2d', 'PoissonNLLLoss', 'GaussianNLLLoss', 'KLDivLoss',
'MSELoss', 'BCELoss', 'BCEWithLogitsLoss', 'HingeEmbeddingLoss', 'MultiLabelMarginLoss',
'SmoothL1Loss', 'HuberLoss', 'SoftMarginLoss', 'CrossEntropyLoss', 'MultiLabelSoftMarginLoss',
'CosineEmbeddingLoss', 'MarginRankingLoss', 'MultiMarginLoss', 'TripletMarginLoss',
'TripletMarginWithDistanceLoss', 'CTCLoss']
class _Loss(Module):
reduction: str
def __init__(self, size_average=None, reduce=None, reduction: str = 'mean') -> None:
super().__init__()
if size_average is not None or reduce is not None:
self.reduction: str = _Reduction.legacy_get_string(size_average, reduce)
else:
self.reduction = reduction
class _WeightedLoss(_Loss):
def __init__(self, weight: Optional[Tensor] = None, size_average=None, reduce=None, reduction: str = 'mean') -> None:
super().__init__(size_average, reduce, reduction)
self.register_buffer('weight', weight)
self.weight: Optional[Tensor]
class L1Loss(_Loss):
r"""Creates a criterion that measures the mean absolute error (MAE) between each element in
the input :math:`x` and target :math:`y`.
The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as:
.. math::
\ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
l_n = \left| x_n - y_n \right|,
where :math:`N` is the batch size. If :attr:`reduction` is not ``'none'``
(default ``'mean'``), then:
.. math::
\ell(x, y) =
\begin{cases}
\operatorname{mean}(L), & \text{if reduction} = \text{`mean';}\\
\operatorname{sum}(L), & \text{if reduction} = \text{`sum'.}
\end{cases}
:math:`x` and :math:`y` are tensors of arbitrary shapes with a total
of :math:`n` elements each.
The sum operation still operates over all the elements, and divides by :math:`n`.
The division by :math:`n` can be avoided if one sets ``reduction = 'sum'``.
Supports real-valued and complex-valued inputs.
Args:
size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
the losses are averaged over each loss element in the batch. Note that for
some losses, there are multiple elements per sample. If the field :attr:`size_average`
is set to ``False``, the losses are instead summed for each minibatch. Ignored
when :attr:`reduce` is ``False``. Default: ``True``
reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
losses are averaged or summed over observations for each minibatch depending
on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
batch element instead and ignores :attr:`size_average`. Default: ``True``
reduction (str, optional): Specifies the reduction to apply to the output:
``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
``'mean'``: the sum of the output will be divided by the number of
elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
and :attr:`reduce` are in the process of being deprecated, and in the meantime,
specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``
Shape:
- Input: :math:`(*)`, where :math:`*` means any number of dimensions.
- Target: :math:`(*)`, same shape as the input.
- Output: scalar. If :attr:`reduction` is ``'none'``, then
:math:`(*)`, same shape as the input.
Examples::
>>> loss = nn.L1Loss()
>>> input = torch.randn(3, 5, requires_grad=True)
>>> target = torch.randn(3, 5)
>>> output = loss(input, target)
>>> output.backward()
"""
__constants__ = ['reduction']
def __init__(self, size_average=None, reduce=None, reduction: str = 'mean') -> None:
super().__init__(size_average, reduce, reduction)
def forward(self, input: Tensor, target: Tensor) -> Tensor:
return F.l1_loss(input, target, reduction=self.reduction)
class NLLLoss(_WeightedLoss):
r"""The negative log likelihood loss. It is useful to train a classification
problem with `C` classes.
If provided, the optional argument :attr:`weight` should be a 1D Tensor assigning
weight to each of the classes. This is particularly useful when you have an
unbalanced training set.
The `input` given through a forward call is expected to contain
log-probabilities of each class. `input` has to be a Tensor of size either
:math:`(minibatch, C)` or :math:`(minibatch, C, d_1, d_2, ..., d_K)`
with :math:`K \geq 1` for the `K`-dimensional case. The latter is useful for
higher dimension inputs, such as computing NLL loss per-pixel for 2D images.
Obtaining log-probabilities in a neural network is easily achieved by
adding a `LogSoftmax` layer in the last layer of your network.
You may use `CrossEntropyLoss` instead, if you prefer not to add an extra
layer.
The `target` that this loss expects should be a class index in the range :math:`[0, C-1]`
where `C = number of classes`; if `ignore_index` is specified, this loss also accepts
this class index (this index may not necessarily be in the class range).
The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as:
.. math::
\ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
l_n = - w_{y_n} x_{n,y_n}, \quad
w_{c} = \text{weight}[c] \cdot \mathbb{1}\{c \not= \text{ignore\_index}\},
where :math:`x` is the input, :math:`y` is the target, :math:`w` is the weight, and
:math:`N` is the batch size. If :attr:`reduction` is not ``'none'``
(default ``'mean'``), then
.. math::
\ell(x, y) = \begin{cases}
\sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n}} l_n, &
\text{if reduction} = \text{`mean';}\\
\sum_{n=1}^N l_n, &
\text{if reduction} = \text{`sum'.}
\end{cases}
Args:
weight (Tensor, optional): a manual rescaling weight given to each
class. If given, it has to be a Tensor of size `C`. Otherwise, it is
treated as if having all ones.
size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
the losses are averaged over each loss element in the batch. Note that for
some losses, there are multiple elements per sample. If the field :attr:`size_average`
is set to ``False``, the losses are instead summed for each minibatch. Ignored
when :attr:`reduce` is ``False``. Default: ``None``
ignore_index (int, optional): Specifies a target value that is ignored
and does not contribute to the input gradient. When
:attr:`size_average` is ``True``, the loss is averaged over
non-ignored targets.
reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
losses are averaged or summed over observations for each minibatch depending
on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
batch element instead and ignores :attr:`size_average`. Default: ``None``
reduction (str, optional): Specifies the reduction to apply to the output:
``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will
be applied, ``'mean'``: the weighted mean of the output is taken,
``'sum'``: the output will be summed. Note: :attr:`size_average`
and :attr:`reduce` are in the process of being deprecated, and in
the meantime, specifying either of those two args will override
:attr:`reduction`. Default: ``'mean'``
Shape:
- Input: :math:`(N, C)` or :math:`(C)`, where `C = number of classes`, or
:math:`(N, C, d_1, d_2, ..., d_K)` with :math:`K \geq 1`
in the case of `K`-dimensional loss.
- Target: :math:`(N)` or :math:`()`, where each value is
:math:`0 \leq \text{targets}[i] \leq C-1`, or
:math:`(N, d_1, d_2, ..., d_K)` with :math:`K \geq 1` in the case of
K-dimensional loss.
- Output: If :attr:`reduction` is ``'none'``, shape :math:`(N)` or
:math:`(N, d_1, d_2, ..., d_K)` with :math:`K \geq 1` in the case of K-dimensional loss.
Otherwise, scalar.
Examples::
>>> m = nn.LogSoftmax(dim=1)
>>> loss = nn.NLLLoss()
>>> # input is of size N x C = 3 x 5
>>> input = torch.randn(3, 5, requires_grad=True)
>>> # each element in target has to have 0 <= value < C
>>> target = torch.tensor([1, 0, 4])
>>> output = loss(m(input), target)
>>> output.backward()
>>>
>>>
>>> # 2D loss example (used, for example, with image inputs)
>>> N, C = 5, 4
>>> loss = nn.NLLLoss()
>>> # input is of size N x C x height x width
>>> data = torch.randn(N, 16, 10, 10)
>>> conv = nn.Conv2d(16, C, (3, 3))
>>> m = nn.LogSoftmax(dim=1)
>>> # each element in target has to have 0 <= value < C
>>> target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C)
>>> output = loss(m(conv(data)), target)
>>> output.backward()
"""
__constants__ = ['ignore_index', 'reduction']
ignore_index: int
def __init__(self, weight: Optional[Tensor] = None, size_average=None, ignore_index: int = -100,
reduce=None, reduction: str = 'mean') -> None:
super().__init__(weight, size_average, reduce, reduction)
self.ignore_index = ignore_index
def forward(self, input: Tensor, target: Tensor) -> Tensor:
return F.nll_loss(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction)
class NLLLoss2d(NLLLoss):
def __init__(self, weight: Optional[Tensor] = None, size_average=None, ignore_index: int = -100,
reduce=None, reduction: str = 'mean') -> None:
warnings.warn("NLLLoss2d has been deprecated. "
"Please use NLLLoss instead as a drop-in replacement and see "
"https://pytorch.org/docs/master/nn.html#torch.nn.NLLLoss for more details.")
super().__init__(weight, size_average, ignore_index, reduce, reduction)
class PoissonNLLLoss(_Loss):
r"""Negative log likelihood loss with Poisson distribution of target.
The loss can be described as:
.. math::
\text{target} \sim \mathrm{Poisson}(\text{input})
\text{loss}(\text{input}, \text{target}) = \text{input} - \text{target} * \log(\text{input})
+ \log(\text{target!})
The last term can be omitted or approximated with Stirling formula. The
approximation is used for target values more than 1. For targets less or
equal to 1 zeros are added to the loss.
Args:
log_input (bool, optional): if ``True`` the loss is computed as
:math:`\exp(\text{input}) - \text{target}*\text{input}`, if ``False`` the loss is
:math:`\text{input} - \text{target}*\log(\text{input}+\text{eps})`.
full (bool, optional): whether to compute full loss, i. e. to add the
Stirling approximation term
.. math::
\text{target}*\log(\text{target}) - \text{target} + 0.5 * \log(2\pi\text{target}).
size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
the losses are averaged over each loss element in the batch. Note that for
some losses, there are multiple elements per sample. If the field :attr:`size_average`
is set to ``False``, the losses are instead summed for each minibatch. Ignored
when :attr:`reduce` is ``False``. Default: ``True``
eps (float, optional): Small value to avoid evaluation of :math:`\log(0)` when
:attr:`log_input = False`. Default: 1e-8
reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
losses are averaged or summed over observations for each minibatch depending
on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
batch element instead and ignores :attr:`size_average`. Default: ``True``
reduction (str, optional): Specifies the reduction to apply to the output:
``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
``'mean'``: the sum of the output will be divided by the number of
elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
and :attr:`reduce` are in the process of being deprecated, and in the meantime,
specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``
Examples::
>>> loss = nn.PoissonNLLLoss()
>>> log_input = torch.randn(5, 2, requires_grad=True)
>>> target = torch.randn(5, 2)
>>> output = loss(log_input, target)
>>> output.backward()
Shape:
- Input: :math:`(*)`, where :math:`*` means any number of dimensions.
- Target: :math:`(*)`, same shape as the input.
- Output: scalar by default. If :attr:`reduction` is ``'none'``, then :math:`(*)`,
the same shape as the input.
"""
__constants__ = ['log_input', 'full', 'eps', 'reduction']
log_input: bool
full: bool
eps: float
def __init__(self, log_input: bool = True, full: bool = False, size_average=None,
eps: float = 1e-8, reduce=None, reduction: str = 'mean') -> None:
super().__init__(size_average, reduce, reduction)
self.log_input = log_input
self.full = full
self.eps = eps
def forward(self, log_input: Tensor, target: Tensor) -> Tensor:
return F.poisson_nll_loss(log_input, target, log_input=self.log_input, full=self.full,
eps=self.eps, reduction=self.reduction)
class GaussianNLLLoss(_Loss):
r"""Gaussian negative log likelihood loss.
The targets are treated as samples from Gaussian distributions with
expectations and variances predicted by the neural network. For a
``target`` tensor modelled as having Gaussian distribution with a tensor
of expectations ``input`` and a tensor of positive variances ``var`` the loss is:
.. math::
\text{loss} = \frac{1}{2}\left(\log\left(\text{max}\left(\text{var},
\ \text{eps}\right)\right) + \frac{\left(\text{input} - \text{target}\right)^2}
{\text{max}\left(\text{var}, \ \text{eps}\right)}\right) + \text{const.}
where :attr:`eps` is used for stability. By default, the constant term of
the loss function is omitted unless :attr:`full` is ``True``. If ``var`` is not the same
size as ``input`` (due to a homoscedastic assumption), it must either have a final dimension
of 1 or have one fewer dimension (with all other sizes being the same) for correct broadcasting.
Args:
full (bool, optional): include the constant term in the loss
calculation. Default: ``False``.
eps (float, optional): value used to clamp ``var`` (see note below), for
stability. Default: 1e-6.
reduction (str, optional): specifies the reduction to apply to the
output:``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction
will be applied, ``'mean'``: the output is the average of all batch
member losses, ``'sum'``: the output is the sum of all batch member
losses. Default: ``'mean'``.
Shape:
- Input: :math:`(N, *)` or :math:`(*)` where :math:`*` means any number of additional
dimensions
- Target: :math:`(N, *)` or :math:`(*)`, same shape as the input, or same shape as the input
but with one dimension equal to 1 (to allow for broadcasting)
- Var: :math:`(N, *)` or :math:`(*)`, same shape as the input, or same shape as the input but
with one dimension equal to 1, or same shape as the input but with one fewer
dimension (to allow for broadcasting)
- Output: scalar if :attr:`reduction` is ``'mean'`` (default) or
``'sum'``. If :attr:`reduction` is ``'none'``, then :math:`(N, *)`, same
shape as the input
Examples::
>>> loss = nn.GaussianNLLLoss()
>>> input = torch.randn(5, 2, requires_grad=True)
>>> target = torch.randn(5, 2)
Loading ...