Improving ensembling

BloodAxe · BloodAxe · commit e41b4fe8e6b0 · 2021-03-25T15:47:54.000+02:00
diff --git a/pytorch_toolbelt/inference/ensembling.py b/pytorch_toolbelt/inference/ensembling.py
@@ -1,8 +1,11 @@
+import torch
 from torch import nn, Tensor
-from typing import List, Union
+from typing import List, Union, Iterable, Optional
 
 __all__ = ["ApplySoftmaxTo", "ApplySigmoidTo", "Ensembler", "PickModelOutput"]
 
+from pytorch_toolbelt.inference.tta import _deaugment_averaging
+
 
 class ApplySoftmaxTo(nn.Module):
     def __init__(self, model: nn.Module, output_key: Union[str, List[str]] = "logits", dim=1, temperature=1):
@@ -55,40 +58,35 @@ class Ensembler(nn.Module):
     Compute sum (or average) of outputs of several models.
     """
 
-    def __init__(self, models: List[nn.Module], average=True, outputs=None):
+    def __init__(self, models: List[nn.Module], reduction: str = "mean", outputs: Optional[Iterable[str]] = None):
         """
 
         :param models:
-        :param average:
+        :param reduction: Reduction key ('mean', 'sum', 'gmean', 'hmean' or None)
         :param outputs: Name of model outputs to average and return from Ensembler.
             If None, all outputs from the first model will be used.
         """
         super().__init__()
         self.outputs = outputs
         self.models = nn.ModuleList(models)
-        self.average = average
+        self.reduction = reduction
 
     def forward(self, *input, **kwargs):  # skipcq: PYL-W0221
-        output_0 = self.models[0](*input, **kwargs)
-        num_models = len(self.models)
+        outputs = [model(*input, **kwargs) for model in self.models]
 
         if self.outputs:
             keys = self.outputs
         else:
-            keys = output_0.keys()
-
-        for index in range(1, num_models):
-            output_i = self.models[index](*input, **kwargs)
-
-            # Sum outputs
-            for key in keys:
-                output_0[key].add_(output_i[key])
+            keys = outputs[0].keys()
 
-        if self.average:
-            for key in keys:
-                output_0[key].mul_(1.0 / num_models)
+        averaged_output = {}
+        for key in keys:
+            predictions = [output[key] for output in outputs]
+            predictions = torch.stack(predictions)
+            predictions = _deaugment_averaging(predictions, self.reduction)
+            averaged_output[key] = predictions
 
-        return output_0
+        return averaged_output
 
 
 class PickModelOutput(nn.Module):
diff --git a/pytorch_toolbelt/inference/tta.py b/pytorch_toolbelt/inference/tta.py
@@ -48,7 +48,7 @@ def _deaugment_averaging(x: Tensor, reduction: MaybeStrOrCallable) -> Tensor:
     Helper method to average predictions of TTA-ed model.
     This function assumes TTA dimension is 0, e.g [T, B, C, Ci, Cj, ..]
     Args:
-        x:
+        x: Input tensor of shape [T, B, ... ]
         reduction: Reduction mode ("sum", "mean", "gmean", "hmean", function, None)
 
     Returns:
@@ -64,6 +64,11 @@ def _deaugment_averaging(x: Tensor, reduction: MaybeStrOrCallable) -> Tensor:
         x = F.harmonic_mean(x, dim=0)
     elif callable(reduction):
         x = reduction(x, dim=0)
+    elif reduction in {None, "None", "none"}:
+        pass
+    else:
+        raise KeyError(f"Unsupported reduction mode {reduction}")
+
     return x
 
 
@@ -94,10 +99,7 @@ def fivecrop_image_augment(image: Tensor, crop_size: Tuple[int, int]) -> Tensor:
     center_crop_x = (image_width - crop_width) // 2
     crop_cc = image[..., center_crop_y : center_crop_y + crop_height, center_crop_x : center_crop_x + crop_width]
 
-    return torch.cat(
-        [crop_tl, crop_tr, crop_bl, crop_br, crop_cc],
-        dim=0,
-    )
+    return torch.cat([crop_tl, crop_tr, crop_bl, crop_br, crop_cc], dim=0,)
 
 
 def fivecrop_label_deaugment(logits: Tensor, reduction: MaybeStrOrCallable = "mean") -> Tensor:
@@ -275,15 +277,7 @@ def d2_image_augment(image: Tensor) -> Tensor:
             - Vertically-flipped tensor
 
     """
-    return torch.cat(
-        [
-            image,
-            F.torch_rot180(image),
-            F.torch_fliplr(image),
-            F.torch_flipud(image),
-        ],
-        dim=0,
-    )
+    return torch.cat([image, F.torch_rot180(image), F.torch_fliplr(image), F.torch_flipud(image),], dim=0,)
 
 
 def d2_image_deaugment(image: Tensor, reduction: MaybeStrOrCallable = "mean") -> Tensor:
@@ -302,12 +296,7 @@ def d2_image_deaugment(image: Tensor, reduction: MaybeStrOrCallable = "mean") ->
     b1, b2, b3, b4 = torch.chunk(image, 4)
 
     image: Tensor = torch.stack(
-        [
-            b1,
-            F.torch_rot180(b2),
-            F.torch_fliplr(b3),
-            F.torch_flipud(b4),
-        ]
+        [b1, F.torch_rot180(b2), F.torch_fliplr(b3), F.torch_flipud(b4),]
     )
 
     return _deaugment_averaging(image, reduction=reduction)
@@ -440,10 +429,7 @@ def flips_image_augment(image: Tensor) -> Tensor:
     return torch.cat([image, F.torch_fliplr(image), F.torch_flipud(image)], dim=0)
 
 
-def flips_image_deaugment(
-    image: Tensor,
-    reduction: MaybeStrOrCallable = "mean",
-) -> Tensor:
+def flips_image_deaugment(image: Tensor, reduction: MaybeStrOrCallable = "mean",) -> Tensor:
     """
     Deaugment input tensor (output of the model) assuming the input was flip-augmented image (See flips_augment).
     Args:
@@ -464,10 +450,7 @@ def flips_image_deaugment(
     return _deaugment_averaging(image, reduction=reduction)
 
 
-def fliplr_labels_deaugment(
-    logits: Tensor,
-    reduction: MaybeStrOrCallable = "mean",
-) -> Tensor:
+def fliplr_labels_deaugment(logits: Tensor, reduction: MaybeStrOrCallable = "mean",) -> Tensor:
     """
     Deaugment input tensor (output of the model) assuming the input was fliplr-augmented image (See fliplr_image_augment).
     Args:
@@ -485,10 +468,7 @@ def fliplr_labels_deaugment(
     return _deaugment_averaging(logits, reduction=reduction)
 
 
-def flips_labels_deaugment(
-    logits: Tensor,
-    reduction: MaybeStrOrCallable = "mean",
-) -> Tensor:
+def flips_labels_deaugment(logits: Tensor, reduction: MaybeStrOrCallable = "mean",) -> Tensor:
     """
     Deaugment input tensor (output of the model) assuming the input was flip-augmented image (See flips_image_augment).
     Args:
@@ -543,9 +523,7 @@ def ms_image_augment(
 
 
 def ms_labels_deaugment(
-    logits: List[Tensor],
-    size_offsets: List[Union[int, Tuple[int, int]]],
-    reduction: MaybeStrOrCallable = "mean",
+    logits: List[Tensor], size_offsets: List[Union[int, Tuple[int, int]]], reduction: MaybeStrOrCallable = "mean",
 ):
     """
     Deaugment logits