BloodAxe
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎pytorch_toolbelt/__init__.py
Lines changed: 1 addition & 1 deletion b/‎pytorch_toolbelt/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pytorch_toolbelt/inference/ensembling.py
Lines changed: 31 additions & 16 deletions b/‎pytorch_toolbelt/inference/ensembling.py
Lines changed: 31 additions & 16 deletions
diff --git a/‎pytorch_toolbelt/losses/dice.py
Lines changed: 2 additions & 2 deletions b/‎pytorch_toolbelt/losses/dice.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pytorch_toolbelt/losses/functional.py
Lines changed: 88 additions & 33 deletions b/‎pytorch_toolbelt/losses/functional.py
Lines changed: 88 additions & 33 deletions
diff --git a/‎pytorch_toolbelt/losses/jaccard.py
Lines changed: 2 additions & 2 deletions b/‎pytorch_toolbelt/losses/jaccard.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pytorch_toolbelt/losses/joint_loss.py
Lines changed: 6 additions & 1 deletion b/‎pytorch_toolbelt/losses/joint_loss.py
Lines changed: 6 additions & 1 deletion
@@ -139,7 +139,7 @@ from pytorch_toolbelt import losses as L
 
 # Creates a loss function that is a weighted sum of focal loss 
 # and lovasz loss with weigths 1.0 and 0.5 accordingly.
-loss = L.JointLoss(L.FocalLoss(), 1.0, L.LovaszLoss(), 0.5)
+loss = L.JointLoss(L.FocalLoss(), L.LovaszLoss(), 1.0, 0.5)
 ```
 
 
 
@@ -1,3 +1,3 @@
 from __future__ import absolute_import
 
-__version__ = "0.3.2"
+__version__ = "0.4.0"
@@ -5,39 +5,54 @@
 
 
 class ApplySoftmaxTo(nn.Module):
-    def __init__(self, model, output_key: Union[str, List[str]] = "logits", dim=1):
+    def __init__(self, model: nn.Module, output_key: Union[str, List[str]] = "logits", dim=1, temperature=1):
+        """
+        Apply softmax activation on given output(s) of the model
+        :param model: Model to wrap
+        :param output_key: string or list of strings, indicating to what outputs softmax activation should be applied.
+        :param dim: Tensor dimension for softmax activation
+        :param temperature: Temperature scaling coefficient. Values > 1 will make logits sharper.
+        """
         super().__init__()
         output_key = output_key if isinstance(output_key, (list, tuple)) else [output_key]
         # By converting to set, we prevent double-activation by passing output_key=["logits", "logits"]
         self.output_keys = set(output_key)
         self.model = model
         self.dim = dim
+        self.temperature = temperature
 
-    def forward(self, input):
-        output = self.model(input)
+    def forward(self, *input, **kwargs):
+        output = self.model(*input, **kwargs)
         for key in self.output_keys:
-            output[key] = output[key].softmax(dim=1)
+            output[key] = output[key].mul(self.temperature).softmax(dim=1)
         return output
 
 
 class ApplySigmoidTo(nn.Module):
-    def __init__(self, model, output_key: Union[str, List[str]] = "logits"):
+    def __init__(self, model: nn.Module, output_key: Union[str, List[str]] = "logits", temperature=1):
+        """
+        Apply sigmoid activation on given output(s) of the model
+        :param model: Model to wrap
+        :param output_key: string or list of strings, indicating to what outputs sigmoid activation should be applied.
+        :param temperature: Temperature scaling coefficient. Values > 1 will make logits sharper.
+        """
         super().__init__()
         output_key = output_key if isinstance(output_key, (list, tuple)) else [output_key]
         # By converting to set, we prevent double-activation by passing output_key=["logits", "logits"]
         self.output_keys = set(output_key)
         self.model = model
+        self.temperature = temperature
 
-    def forward(self, input):  # skipcq: PYL-W0221
-        output = self.model(input)
+    def forward(self, *input, **kwargs):  # skipcq: PYL-W0221
+        output = self.model(*input, **kwargs)
         for key in self.output_keys:
-            output[key] = output[key].sigmoid()
+            output[key] = output[key].mul(self.temperature).sigmoid()
         return output
 
 
 class Ensembler(nn.Module):
     """
-    Computes sum of outputs for several models with arithmetic averaging (optional).
+    Compute sum (or average) of outputs of several models.
     """
 
     def __init__(self, models: List[nn.Module], average=True, outputs=None):
@@ -53,8 +68,8 @@ def __init__(self, models: List[nn.Module], average=True, outputs=None):
         self.models = nn.ModuleList(models)
         self.average = average
 
-    def forward(self, x):  # skipcq: PYL-W0221
-        output_0 = self.models[0](x)
+    def forward(self, *input, **kwargs):  # skipcq: PYL-W0221
+        output_0 = self.models[0](*input, **kwargs)
         num_models = len(self.models)
 
         if self.outputs:
@@ -63,15 +78,15 @@ def forward(self, x):  # skipcq: PYL-W0221
             keys = output_0.keys()
 
         for index in range(1, num_models):
-            output_i = self.models[index](x)
+            output_i = self.models[index](*input, **kwargs)
 
             # Sum outputs
             for key in keys:
-                output_0[key] += output_i[key]
+                output_0[key].add_(output_i[key])
 
         if self.average:
             for key in keys:
-                output_0[key] /= num_models
+                output_0[key].mul_(1.0 / num_models)
 
         return output_0
 
@@ -86,6 +101,6 @@ def __init__(self, model: nn.Module, key: str):
         self.model = model
         self.target_key = key
 
-    def forward(self, input) -> Tensor:
-        output = self.model(input)
+    def forward(self, *input, **kwargs) -> Tensor:
+        output = self.model(*input, **kwargs)
         return output[self.target_key]
@@ -82,12 +82,12 @@ def forward(self, y_pred: Tensor, y_true: Tensor) -> Tensor:
             y_true = y_true.view(bs, num_classes, -1)
             y_pred = y_pred.view(bs, num_classes, -1)
 
-        scores = soft_dice_score(y_pred, y_true.type_as(y_pred), self.smooth, self.eps, dims=dims)
+        scores = soft_dice_score(y_pred, y_true.type_as(y_pred), smooth=self.smooth, eps=self.eps, dims=dims)
 
         if self.log_loss:
             loss = -torch.log(scores.clamp_min(self.eps))
         else:
-            loss = 1 - scores
+            loss = 1.0 - scores
 
         # Dice loss is undefined for non-empty classes
         # So we zero contribution of channel that does not have true pixels
 
@@ -8,21 +8,25 @@
 
 
 def focal_loss_with_logits(
-    input: torch.Tensor,
+    output: torch.Tensor,
     target: torch.Tensor,
-    gamma=2.0,
+    gamma: float = 2.0,
     alpha: Optional[float] = 0.25,
-    reduction="mean",
-    normalized=False,
+    reduction: str = "mean",
+    normalized: bool = False,
     reduced_threshold: Optional[float] = None,
+    eps: float = 1e-6,
 ) -> torch.Tensor:
     """Compute binary focal loss between target and output logits.
 
     See :class:`~pytorch_toolbelt.losses.FocalLoss` for details.
 
     Args:
-        input: Tensor of arbitrary shape
+        output: Tensor of arbitrary shape (predictions of the model)
         target: Tensor of the same shape as input
+        gamma: Focal loss power factor
+        alpha: Weight factor to balance positive and negative samples. Alpha must be in [0...1] range,
+            high values will give more weight to positive class.
         reduction (string, optional): Specifies the reduction to apply to the output:
             'none' | 'mean' | 'sum' | 'batchwise_mean'. 'none': no reduction will be applied,
             'mean': the sum of the output will be divided by the number of
@@ -32,18 +36,18 @@ def focal_loss_with_logits(
             'batchwise_mean' computes mean loss per sample in batch. Default: 'mean'
         normalized (bool): Compute normalized focal loss (https://arxiv.org/pdf/1909.07829.pdf).
         reduced_threshold (float, optional): Compute reduced focal loss (https://arxiv.org/abs/1903.01347).
-    References::
 
+    References:
         https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/loss/losses.py
     """
-    target = target.type(input.type())
+    target = target.type(output.type())
 
-    logpt = F.binary_cross_entropy_with_logits(input, target, reduction="none")
+    logpt = F.binary_cross_entropy_with_logits(output, target, reduction="none")
     pt = torch.exp(-logpt)
 
     # compute the loss
     if reduced_threshold is None:
-        focal_term = (1 - pt).pow(gamma)
+        focal_term = (1.0 - pt).pow(gamma)
     else:
         focal_term = ((1.0 - pt) / reduced_threshold).pow(gamma)
         focal_term[pt < reduced_threshold] = 1
@@ -54,7 +58,7 @@ def focal_loss_with_logits(
         loss *= alpha * target + (1 - alpha) * (1 - target)
 
     if normalized:
-        norm_factor = focal_term.sum() + 1e-5
+        norm_factor = focal_term.sum().clamp_min(eps)
         loss /= norm_factor
 
     if reduction == "mean":
@@ -72,19 +76,22 @@ def focal_loss_with_logits(
 
 
 # TODO: Mark as deprecated and emit warning
-def reduced_focal_loss(input: torch.Tensor, target: torch.Tensor, threshold=0.5, gamma=2.0, reduction="mean"):
+def reduced_focal_loss(output: torch.Tensor, target: torch.Tensor, threshold=0.5, gamma=2.0, reduction="mean"):
     return focal_loss_with_logits(
-        input, target, alpha=None, gamma=gamma, reduction=reduction, reduced_threshold=threshold
+        output, target, alpha=None, gamma=gamma, reduction=reduction, reduced_threshold=threshold
     )
 
 
-def soft_jaccard_score(y_pred: torch.Tensor, y_true: torch.Tensor, smooth=0.0, eps=1e-7, dims=None) -> torch.Tensor:
+def soft_jaccard_score(
+    output: torch.Tensor, target: torch.Tensor, smooth: float = 0.0, eps: float = 1e-7, dims=None
+) -> torch.Tensor:
     """
 
-    :param y_pred:
-    :param y_true:
+    :param output:
+    :param target:
     :param smooth:
     :param eps:
+    :param dims:
     :return:
 
     Shape:
@@ -94,25 +101,27 @@ def soft_jaccard_score(y_pred: torch.Tensor, y_true: torch.Tensor, smooth=0.0, e
         - Output: scalar.
 
     """
-    assert y_pred.size() == y_true.size()
+    assert output.size() == target.size()
 
     if dims is not None:
-        intersection = torch.sum(y_pred * y_true, dim=dims)
-        cardinality = torch.sum(y_pred + y_true, dim=dims)
+        intersection = torch.sum(output * target, dim=dims)
+        cardinality = torch.sum(output + target, dim=dims)
     else:
-        intersection = torch.sum(y_pred * y_true)
-        cardinality = torch.sum(y_pred + y_true)
+        intersection = torch.sum(output * target)
+        cardinality = torch.sum(output + target)
 
     union = cardinality - intersection
-    jaccard_score = (intersection + smooth) / (union.clamp_min(eps) + smooth)
+    jaccard_score = (intersection + smooth) / (union + smooth).clamp_min(eps)
     return jaccard_score
 
 
-def soft_dice_score(y_pred: torch.Tensor, y_true: torch.Tensor, smooth=0, eps=1e-7, dims=None) -> torch.Tensor:
+def soft_dice_score(
+    output: torch.Tensor, target: torch.Tensor, smooth: float = 0.0, eps: float = 1e-7, dims=None
+) -> torch.Tensor:
     """
 
-    :param y_pred:
-    :param y_true:
+    :param output:
+    :param target:
     :param smooth:
     :param eps:
     :return:
@@ -124,28 +133,28 @@ def soft_dice_score(y_pred: torch.Tensor, y_true: torch.Tensor, smooth=0, eps=1e
         - Output: scalar.
 
     """
-    assert y_pred.size() == y_true.size()
+    assert output.size() == target.size()
     if dims is not None:
-        intersection = torch.sum(y_pred * y_true, dim=dims)
-        cardinality = torch.sum(y_pred + y_true, dim=dims)
+        intersection = torch.sum(output * target, dim=dims)
+        cardinality = torch.sum(output + target, dim=dims)
     else:
-        intersection = torch.sum(y_pred * y_true)
-        cardinality = torch.sum(y_pred + y_true)
-    dice_score = (2.0 * intersection + smooth) / (cardinality.clamp_min(eps) + smooth)
+        intersection = torch.sum(output * target)
+        cardinality = torch.sum(output + target)
+    dice_score = (2.0 * intersection + smooth) / (cardinality + smooth).clamp_min(eps)
     return dice_score
 
 
-def wing_loss(prediction: torch.Tensor, target: torch.Tensor, width=5, curvature=0.5, reduction="mean"):
+def wing_loss(output: torch.Tensor, target: torch.Tensor, width=5, curvature=0.5, reduction="mean"):
     """
     https://arxiv.org/pdf/1711.06753.pdf
-    :param prediction:
+    :param output:
     :param target:
     :param width:
     :param curvature:
     :param reduction:
     :return:
     """
-    diff_abs = (target - prediction).abs()
+    diff_abs = (target - output).abs()
     loss = diff_abs.clone()
 
     idx_smaller = diff_abs < width
@@ -163,3 +172,49 @@ def wing_loss(prediction: torch.Tensor, target: torch.Tensor, width=5, curvature
         loss = loss.mean()
 
     return loss
+
+
+def label_smoothed_nll_loss(
+    lprobs: torch.Tensor, target: torch.Tensor, epsilon: float, ignore_index=None, reduction="mean", dim=-1
+) -> torch.Tensor:
+    """
+
+    Source: https://github.com/pytorch/fairseq/blob/master/fairseq/criterions/label_smoothed_cross_entropy.py
+
+    :param lprobs: Log-probabilities of predictions (e.g after log_softmax)
+    :param target:
+    :param epsilon:
+    :param ignore_index:
+    :param reduction:
+    :return:
+    """
+    if target.dim() == lprobs.dim() - 1:
+        target = target.unsqueeze(dim)
+
+    if ignore_index is not None:
+        pad_mask = target.eq(ignore_index)
+        target = target.masked_fill(pad_mask, 0)
+        nll_loss = -lprobs.gather(dim=dim, index=target)
+        smooth_loss = -lprobs.sum(dim=dim, keepdim=True)
+
+        # nll_loss.masked_fill_(pad_mask, 0.0)
+        # smooth_loss.masked_fill_(pad_mask, 0.0)
+        nll_loss = nll_loss.masked_fill(pad_mask, 0.0)
+        smooth_loss = smooth_loss.masked_fill(pad_mask, 0.0)
+    else:
+        nll_loss = -lprobs.gather(dim=dim, index=target)
+        smooth_loss = -lprobs.sum(dim=dim, keepdim=True)
+
+        nll_loss = nll_loss.squeeze(dim)
+        smooth_loss = smooth_loss.squeeze(dim)
+
+    if reduction == "sum":
+        nll_loss = nll_loss.sum()
+        smooth_loss = smooth_loss.sum()
+    if reduction == "mean":
+        nll_loss = nll_loss.mean()
+        smooth_loss = smooth_loss.mean()
+
+    eps_i = epsilon / lprobs.size(dim)
+    loss = (1.0 - epsilon) * nll_loss + eps_i * smooth_loss
+    return loss
@@ -82,12 +82,12 @@ def forward(self, y_pred: Tensor, y_true: Tensor) -> Tensor:
             y_true = y_true.view(bs, num_classes, -1)
             y_pred = y_pred.view(bs, num_classes, -1)
 
-        scores = soft_jaccard_score(y_pred, y_true.type(y_pred.dtype), self.smooth, self.eps, dims=dims)
+        scores = soft_jaccard_score(y_pred, y_true.type(y_pred.dtype), smooth=self.smooth, eps=self.eps, dims=dims)
 
         if self.log_loss:
             loss = -torch.log(scores.clamp_min(self.eps))
         else:
-            loss = 1 - scores
+            loss = 1.0 - scores
 
         # IoU loss is defined for non-empty classes
         # So we zero contribution of channel that does not have true pixels
 
@@ -1,3 +1,4 @@
+from torch import nn
 from torch.nn.modules.loss import _Loss
 
 __all__ = ["JointLoss", "WeightedLoss"]
@@ -18,7 +19,11 @@ def forward(self, *input):
 
 
 class JointLoss(_Loss):
-    def __init__(self, first, second, first_weight=1.0, second_weight=1.0):
+    """
+    Wrap two loss functions into one. This class computes a weighted sum of two losses.
+    """
+
+    def __init__(self, first: nn.Module, second: nn.Module, first_weight=1.0, second_weight=1.0):
         super().__init__()
         self.first = WeightedLoss(first, first_weight)
         self.second = WeightedLoss(second, second_weight)
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`from __future__ import absolute_import`
`2`	`2`
`3`		`-__version__ = "0.3.2"`
	`3`	`+__version__ = "0.4.0"`