Merge branch 'develop'

BloodAxe · BloodAxe · commit 89c8be3556bc · 2019-04-25T22:04:43.000+03:00
diff --git a/README.md b/README.md
@@ -8,19 +8,111 @@ A `pytorch-toolbelt` is a Python library with a set of bells and whistles for Py
 
 * Easy model building using flexible encoder-decoder architecture.
 * Modules: CoordConv, SCSE, Hypercolumn, Depthwise separable convolution and more
-* GPU-friendly test-time augmentation
+* GPU-friendly test-time augmentation TTA for segmentation and classification
 * GPU-friendly inference on huge (5000x5000) images
-* Every-day common routines (fix/restore random seed, filesystem utils)
-* Fancy losses: Focal, Lovasz, Jaccard and Dice losses
+* Every-day common routines (fix/restore random seed, filesystem utils, metrics)
+* Fancy losses: Focal, Lovasz, Jaccard and Dice losses, Wing Loss
 
-# Quick start
+# Why
 
-`TODO: Implement`
+Honest answer is "I needed a convenient way to re-use code for my Kaggle career". 
+During 2018 I achieved a [Kaggle Master](https://www.kaggle.com/bloodaxe) badge and this been a long path. 
+Very often I found myself re-using most of the old pipelines over and over again. 
+At some point it crystallized into this repository. 
+
+This lib is not meant to replace catalyst / ignite / fast.ai. Instead it's designed to complement them.
 
 # Installation
 
-`TODO: Implement`
+`pip install pytorch_toolbelt`
+
+# Showcase
+
+## Encoder-decoder models construction
+
+```python
+from pytorch_toolbelt.modules import encoders as E
+from pytorch_toolbelt.modules import decoders as D
+
+class FPNSegmentationModel(nn.Module):
+    def __init__(self, encoder:E.EncoderModule, num_classes, fpn_features=128):
+        self.encoder = encoder
+        self.decoder = D.FPNDecoder(encoder.output_filters, fpn_features=fpn_features)
+        self.fuse = D.FPNFuse()
+        input_channels = sum(self.decoder.output_filters)
+        self.logits = nn.Conv2d(input_channels, num_classes,kernel_size=1)
+        
+    def forward(self, input):
+        features = self.encoder(input)
+        features = self.decoder(features)
+        features = self.fuse(features)
+        logits = self.logits(features)
+        return logits
+        
+def fpn_resnext50(num_classes):
+  encoder = E.SEResNeXt50Encoder()
+  return FPNSegmentationModel(encoder, num_classes)
+  
+def fpn_mobilenet(num_classes):
+  encoder = E.MobilenetV2Encoder()
+  return FPNSegmentationModel(encoder, num_classes)
+```
+
+## Compose multiple losses
+
+```python
+from pytorch_toolbelt import losses as L
+
+loss = L.JointLoss(L.FocalLoss(), 1.0, L.LovaszLoss(), 0.5)
+```
+
+## Test-time augmentation
+
+```python
+from pytorch_toolbelt.inference import tta
+
+# Truly functional TTA for image classification using horizontal flips:
+logits = tta.fliplr_image2label(model, input)
+
+# Truly functional TTA for image segmentation using D4 augmentation:
+logits = tta.d4_image2mask(model, input)
+
+# TTA using wrapper module:
+tta_model = tta.TTAWrapper(model, tta.fivecrop_image2label, crop_size=512)
+logits = tta_model(input)
+```
+
+## Inference on huge images:
+
+```python
+import numpy as np
+import torch
+import cv2
+
+from pytorch_toolbelt.inference.tiles import ImageSlicer, CudaTileMerger
+from pytorch_toolbelt.utils.torch_utils import tensor_from_rgb_image, to_numpy
+
+
+image = cv2.imread('really_huge_image.jpg')
+model = get_model(...)
+
+# Cut large image into overlapping tiles
+tiler = ImageSlicer(image.shape, tile_size=(512, 512), tile_step=(256, 256), weight='pyramid')
+
+# HCW -> CHW. Optionally, do normalization here
+tiles = [tensor_from_rgb_image(tile) for tile in tiler.split(image)]
+
+# Allocate a CUDA buffer for holding entire mask
+merger = CudaTileMerger(tiler.target_shape, 1, tiler.weight)
+
+# Run predictions for tiles and accumulate them
+for tiles_batch, coords_batch in DataLoader(list(zip(tiles, tiler.crops)), batch_size=8, pin_memory=True):
+    tiles_batch = tiles_batch.float().cuda()
+    pred_batch = model(tiles_batch)
 
-# Documentation
+    merger.integrate_batch(pred_batch, coords_batch)
 
-`TODO: Implement`
+# Normalize accumulated mask and convert back to numpy
+merged_mask = np.moveaxis(to_numpy(merger.merge()), 0, -1).astype(np.uint8)
+merged_mask = tiler.crop_to_orignal_size(merged_mask)
+```
diff --git a/examples/segmentation-inria/models/factory.py b/examples/segmentation-inria/models/factory.py
@@ -12,7 +12,7 @@
 from tqdm import tqdm
 
 from pytorch_toolbelt.inference.tiles import CudaTileMerger, ImageSlicer
-from pytorch_toolbelt.inference.tta import tta_fliplr_image2mask, tta_d4_image2mask
+from pytorch_toolbelt.inference.tta import fliplr_image2mask, d4_image2mask
 from pytorch_toolbelt.losses.focal import BinaryFocalLoss
 from pytorch_toolbelt.losses.jaccard import BinaryJaccardLogLoss
 from pytorch_toolbelt.losses.lovasz import BinaryLovaszLoss
@@ -94,7 +94,7 @@ def __init__(self, model):
         self.model = model
 
     def forward(self, x):
-        return tta_d4_image2mask(self.model, x)
+        return d4_image2mask(self.model, x)
 
 
 class TTAWrapperD4(nn.Module):
@@ -103,7 +103,7 @@ def __init__(self, model):
         self.model = model
 
     def forward(self, x):
-        return tta_fliplr_image2mask(self.model, x)
+        return fliplr_image2mask(self.model, x)
 
 
 def predict(model: nn.Module, image: np.ndarray, image_size, tta=None, normalize=A.Normalize(), batch_size=1, activation='sigmoid') -> np.ndarray:
diff --git a/pytorch_toolbelt/__init__.py b/pytorch_toolbelt/__init__.py
@@ -1,3 +1,3 @@
 from __future__ import absolute_import
 
-__version__ = '0.0.3'
+__version__ = '0.0.4'
diff --git a/pytorch_toolbelt/inference/tiles.py b/pytorch_toolbelt/inference/tiles.py
@@ -119,13 +119,16 @@ def __init__(self, image_shape, tile_size, tile_step=0, image_margin=0, weight='
             self.margin_top = image_margin
             self.margin_bottom = image_margin
 
-        self.crops = []
-        self.bbox_crops = []
+        crops = []
+        bbox_crops = []
 
         for y in range(0, self.image_height + self.margin_top + self.margin_bottom - self.tile_size[0] + 1, self.tile_step[0]):
             for x in range(0, self.image_width + self.margin_left + self.margin_right - self.tile_size[1] + 1, self.tile_step[1]):
-                self.crops.append((x, y, self.tile_size[1], self.tile_size[0]))
-                self.bbox_crops.append((x - self.margin_left, y - self.margin_top, self.tile_size[1], self.tile_size[0]))
+                crops.append((x, y, self.tile_size[1], self.tile_size[0]))
+                bbox_crops.append((x - self.margin_left, y - self.margin_top, self.tile_size[1], self.tile_size[0]))
+
+        self.crops = np.array(crops)
+        self.bbox_crops = np.array(bbox_crops)
 
     def split(self, image, border_type=cv2.BORDER_CONSTANT, value=0):
         assert image.shape[0] == self.image_height
diff --git a/pytorch_toolbelt/inference/tta.py b/pytorch_toolbelt/inference/tta.py
@@ -3,13 +3,17 @@
 Despite this is called test-time augmentation, these method can be used at training time as well since all
 transformation written in PyTorch and respect gradients flow.
 """
+from functools import partial
 from typing import Tuple
 
 from torch import Tensor, nn
 from . import functional as F
 
+__all__ = ['d4_image2label', 'd4_image2mask', 'fivecrop_image2label', 'fliplr_image2mask',
+           'fliplr_image2label', 'TTAWrapper']
 
-def tta_fliplr_image2label(model: nn.Module, image: Tensor) -> Tensor:
+
+def fliplr_image2label(model: nn.Module, image: Tensor) -> Tensor:
     """Test-time augmentation for image classification that averages predictions
     for input image and vertically flipped one.
 
@@ -22,7 +26,7 @@ def tta_fliplr_image2label(model: nn.Module, image: Tensor) -> Tensor:
     return output * one_over_2
 
 
-def tta_fivecrop_image2label(model: nn.Module, image: Tensor, crop_size: Tuple) -> Tensor:
+def fivecrop_image2label(model: nn.Module, image: Tensor, crop_size: Tuple) -> Tensor:
     """Test-time augmentation for image classification that takes five crops out of input tensor (4 on corners and central)
     and averages predictions from them.
 
@@ -66,7 +70,7 @@ def tta_fivecrop_image2label(model: nn.Module, image: Tensor, crop_size: Tuple)
     return output * one_over_5
 
 
-def tta_fliplr_image2mask(model: nn.Module, image: Tensor) -> Tensor:
+def fliplr_image2mask(model: nn.Module, image: Tensor) -> Tensor:
     """Test-time augmentation for image segmentation that averages predictions
     for input image and vertically flipped one.
 
@@ -81,7 +85,7 @@ def tta_fliplr_image2mask(model: nn.Module, image: Tensor) -> Tensor:
     return output * one_over_2
 
 
-def tta_d4_image2label(model: nn.Module, image: Tensor) -> Tensor:
+def d4_image2label(model: nn.Module, image: Tensor) -> Tensor:
     """Test-time augmentation for image classification that averages predictions
     of all D4 augmentations applied to input image.
 
@@ -105,7 +109,7 @@ def tta_d4_image2label(model: nn.Module, image: Tensor) -> Tensor:
     return output * one_over_8
 
 
-def tta_d4_image2mask(model: nn.Module, image: Tensor) -> Tensor:
+def d4_image2mask(model: nn.Module, image: Tensor) -> Tensor:
     """Test-time augmentation for image classification that averages predictions
     of all D4 augmentations applied to input image.
 
@@ -129,3 +133,13 @@ def tta_d4_image2mask(model: nn.Module, image: Tensor) -> Tensor:
 
     one_over_8 = float(1.0 / 8.0)
     return output * one_over_8
+
+
+class TTAWrapper(nn.Module):
+    def __init__(self, model, tta_function, **kwargs):
+        super().__init__()
+        self.model = model
+        self.tta = partial(tta_function, **kwargs)
+
+    def forward(self, *input):
+        return self.tta(self.model, *input)
diff --git a/pytorch_toolbelt/modules/decoders.py b/pytorch_toolbelt/modules/decoders.py
@@ -54,7 +54,7 @@ def forward(self, features):
 
 class FPNDecoder(DecoderModule):
     def __init__(self, features,
-                 prediction_block: nn.Module,
+                 prediction_block=FPNPredictionBlock,
                  bottleneck=FPNBottleneckBlock,
                  fpn_features=128,
                  prediction_features=128,
diff --git a/pytorch_toolbelt/optimization/functional.py b/pytorch_toolbelt/optimization/functional.py
@@ -1,5 +1,6 @@
 def get_lr_decay_parameters(parameters, learning_rate, groups: dict):
-    custom_lr_parameters = dict((group_name, {'params': [], 'lr': learning_rate * lr_factor}) for (group_name, lr_factor) in groups.items())
+    custom_lr_parameters = dict((group_name, {'params': [], 'lr': learning_rate * lr_factor})
+                                for (group_name, lr_factor) in groups.items())
     custom_lr_parameters['default'] = {'params': [], 'lr': learning_rate}
 
     for parameter_name, parameter in parameters:
diff --git a/tests/test_tiles.py b/tests/test_tiles.py
@@ -1,5 +1,7 @@
 import numpy as np
 import torch
+from torch import nn
+from torch.utils.data import DataLoader
 
 from pytorch_toolbelt.inference.tiles import ImageSlicer, CudaTileMerger
 from pytorch_toolbelt.utils.torch_utils import tensor_from_rgb_image, rgb_image_from_tensor, to_numpy
@@ -24,19 +26,33 @@ def test_tiles_split_merge_2():
     np.testing.assert_equal(merged, image)
 
 
+@torch.no_grad()
 def test_tiles_split_merge_cuda():
     if not torch.cuda.is_available():
         return
+
+    class MaxChannelIntensity(nn.Module):
+        def __init__(self):
+            super().__init__()
+
+        def forward(self, input):
+            max_channel, _ = torch.max(input, dim=1, keepdim=True)
+            return max_channel
+
     image = np.random.random((5000, 5000, 3)).astype(np.uint8)
     tiler = ImageSlicer(image.shape, tile_size=(512, 512), tile_step=(256, 256), weight='pyramid')
-    tiles = tiler.split(image)
+    tiles = [tensor_from_rgb_image(tile) for tile in tiler.split(image)]
 
-    merger = CudaTileMerger(tiler.target_shape, 3, tiler.weight)
-    for tile, coord in zip(tiles, tiler.crops):
-        batch = tensor_from_rgb_image(tile).unsqueeze(0).float().cuda()
-        merger.integrate_batch(batch, [coord])
+    model = MaxChannelIntensity().eval().cuda()
+
+    merger = CudaTileMerger(tiler.target_shape, 1, tiler.weight)
+    for tiles_batch, coords_batch in DataLoader(list(zip(tiles, tiler.crops)), batch_size=8, pin_memory=True):
+        tiles_batch = tiles_batch.float().cuda()
+        pred_batch = model(tiles_batch)
+
+        merger.integrate_batch(pred_batch, coords_batch)
 
     merged = np.moveaxis(to_numpy(merger.merge()), 0, -1).astype(np.uint8)
     merged = tiler.crop_to_orignal_size(merged)
 
-    np.testing.assert_equal(merged, image)
+    np.testing.assert_equal(merged, image.max(axis=2, keepdims=True))

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`from __future__ import absolute_import`
`2`	`2`
`3`		`-__version__ = '0.0.3'`
	`3`	`+__version__ = '0.0.4'`