catalyst-team · bagxi · Feb 1, 2020 · Feb 1, 2020 · Feb 1, 2020 · Feb 1, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -43,3 +43,9 @@ jobs:
       install: *requirements_dl
       script:
         - bash ./bin/tests/_check_semantic.sh
+
+    - stage: Pipelines
+      name: "Pipelines - instance segmentation"
+      install: *requirements_dl
+      script:
+        - bash ./bin/tests/_check_instance.sh
diff --git a/README.md b/README.md
@@ -82,6 +82,12 @@ elif [[ "$DATASET" == "voc2012" ]]; then
     tar -xf VOCtrainval_11-May-2012.tar &>/dev/null
     mkdir -p ./data/origin/images/; mv VOCdevkit/VOC2012/JPEGImages/* $_
     mkdir -p ./data/origin/raw_masks; mv VOCdevkit/VOC2012/SegmentationClass/* $_
+elif [[ "$DATASET" == "dsb2018" ]]; then
+    # instance segmentation
+    # https://www.kaggle.com/c/data-science-bowl-2018
+    download-gdrive 1RCqaQZLziuq1Z4sbMpwD_WHjqR5cdPvh dsb2018_cleared_191109.tar.gz
+    tar -xf dsb2018_cleared_191109.tar.gz &>/dev/null
+    mv dsb2018_cleared_191109 ./data/origin
 fi
 ```
 
@@ -97,6 +103,11 @@ fi
 #### Data structure
 
 Make sure, that final folder with data has the required structure:
+
+<details open>
+<summary>Data structure for binary segmentation</summary>
+<p>
+
 ```bash
 /path/to/your_dataset/
         images/
@@ -110,6 +121,66 @@ Make sure, that final folder with data has the required structure:
             ...
             mask_N
 ```
+where each `mask` is a binary image
+
+</p>
+</details>
+
+<details>
+<summary>Data structure for semantic segmentation</summary>
+<p>
+
+```bash
+/path/to/your_dataset/
+        images/
+            image_1
+            image_2
+            ...
+            image_N
+        raw_masks/
+            mask_1
+            mask_2
+            ...
+            mask_N
+```
+where each `mask` is an image with class encoded through colors e.g. [VOC2012](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/) dataset where `bicycle` class is encoded with <span style="color:rgb(0, 128, 0)">green</span> color and `bird` with <span style="color:rgb(128, 128, 0)">olive</span>
+
+</p>
+</details>
+
+<details>
+<summary>Data structure for instance segmentation</summary>
+<p>
+
+```bash
+/path/to/your_dataset/
+        images/
+            image_1
+            image_2
+            ...
+            image_M
+        raw_masks/
+            mask_1/
+                instance_1
+                instance_2
+                ...
+                instance_N
+            mask_2/
+                instance_1
+                instance_2
+                ...
+                instance_K
+            ...
+            mask_M/
+                instance_1
+                instance_2
+                ...
+                instance_Z
+```
+where each `mask` represented as a folder with instances images (one image per instance), and masks may consisting of a different number of instances e.g. [Data Science Bowl 2018](https://www.kaggle.com/c/data-science-bowl-2018) dataset
+
+</p>
+</details>
 
 #### Data location
 
@@ -234,6 +305,50 @@ docker run -it --rm --shm-size 8G --runtime=nvidia \
 </p>
 </details>
 
+<details>
+<summary>Instance segmentation pipeline</summary>
+<p>
+
+#### Run in local environment:
+
+```bash
+CUDA_VISIBLE_DEVICES=0 \
+CUDNN_BENCHMARK="True" \
+CUDNN_DETERMINISTIC="True" \
+WORKDIR=./logs \
+DATADIR=./data/origin \
+IMAGE_SIZE=256 \
+CONFIG_TEMPLATE=./configs/templates/instance.yml \
+NUM_WORKERS=4 \
+BATCH_SIZE=256 \
+bash ./bin/catalyst-instance-segmentation-pipeline.sh
+```
+
+#### Run in docker:
+
+```bash
+export LOGDIR=$(pwd)/logs
+docker run -it --rm --shm-size 8G --runtime=nvidia \
+   -v $(pwd):/workspace/ \
+   -v $LOGDIR:/logdir/ \
+   -v $(pwd)/data/origin:/data \
+   -e "CUDA_VISIBLE_DEVICES=0" \
+   -e "USE_WANDB=1" \
+   -e "LOGDIR=/logdir" \
+   -e "CUDNN_BENCHMARK='True'" \
+   -e "CUDNN_DETERMINISTIC='True'" \
+   -e "WORKDIR=/logdir" \
+   -e "DATADIR=/data" \
+   -e "IMAGE_SIZE=256" \
+   -e "CONFIG_TEMPLATE=./configs/templates/instance.yml" \
+   -e "NUM_WORKERS=4" \
+   -e "BATCH_SIZE=256" \
+   catalyst-segmentation ./bin/catalyst-instance-segmentation-pipeline.sh
+```
+
+</p>
+</details>
+
 The pipeline is running and you don’t have to do anything else, it remains to wait for the best model!
 
 #### Visualizations

diff --git a/bin/catalyst-instance-segmentation-pipeline.sh b/bin/catalyst-instance-segmentation-pipeline.sh
@@ -0,0 +1,136 @@
+#!/usr/bin/env bash
+#title           :catalyst-instance-segmentation-pipeline
+#description     :catalyst.dl script for instance segmentation pipeline run
+#author          :Sergey Kolesnikov, Yauheni Kachan
+#author_email    :scitator@gmail.com, yauheni.kachan@gmail.com
+#date            :20191109
+#version         :19.11.1
+#==============================================================================
+
+# usage:
+# WORKDIR=/path/to/logdir \
+# DATADIR=/path/to/dataset \
+# IMAGE_SIZE=... \
+# CONFIG_TEMPLATE=... \ # model config to use
+# ./bin/catalyst-instance-segmentation-pipeline.sh
+
+# example:
+# CUDA_VISIBLE_DEVICES=0 \
+# CUDNN_BENCHMARK="True" \
+# CUDNN_DETERMINISTIC="True" \
+# WORKDIR=./logs \
+# DATADIR=./data/origin \
+# IMAGE_SIZE=256 \
+# CONFIG_TEMPLATE=./configs/templates/instance.yml \
+# NUM_WORKERS=4 \
+# BATCH_SIZE=256 \
+# ./bin/catalyst-instance-segmentation-pipeline.sh
+
+set -e
+
+# --- test part
+# uncomment and run bash ./bin/catalyst-instance-segmentation-pipeline.sh
+
+#mkdir -p ./data
+#download-gdrive 1RCqaQZLziuq1Z4sbMpwD_WHjqR5cdPvh dsb2018_cleared_191109.tar.gz
+#tar -xf dsb2018_cleared_191109.tar.gz &>/dev/null
+#mv dsb2018_cleared_191109 ./data/origin
+#
+#export CUDNN_BENCHMARK="True"
+#export CUDNN_DETERMINISTIC="True"
+#
+#export CONFIG_TEMPLATE=./configs/templates/instance.yml
+#export WORKDIR=./logs
+#export DATADIR=./data/origin
+#export NUM_WORKERS=4
+#export BATCH_SIZE=64
+#export IMAGE_SIZE=256
+
+# ---- environment variables
+
+if [[ -z "$NUM_WORKERS" ]]; then
+      NUM_WORKERS=4
+fi
+
+if [[ -z "$BATCH_SIZE" ]]; then
+      BATCH_SIZE=64
+fi
+
+if [[ -z "$IMAGE_SIZE" ]]; then
+      IMAGE_SIZE=256
+fi
+
+if [[ -z "$CONFIG_TEMPLATE" ]]; then
+      CONFIG_TEMPLATE="./configs/templates/instance.yml"
+fi
+
+if [[ -z "$DATADIR" ]]; then
+      DATADIR="./data/origin"
+fi
+
+if [[ -z "$WORKDIR" ]]; then
+      WORKDIR="./logs"
+fi
+
+SKIPDATA=""
+while getopts ":s" flag; do
+  case "${flag}" in
+    s) SKIPDATA="true" ;;
+  esac
+done
+
+date=$(date +%y%m%d-%H%M%S)
+postfix=$(openssl rand -hex 4)
+logname="$date-$postfix"
+export DATASET_DIR=$WORKDIR/dataset
+export RAW_MASKS_DIR=$DATASET_DIR/raw_masks
+export CONFIG_DIR=$WORKDIR/configs-${logname}
+export LOGDIR=$WORKDIR/logdir-${logname}
+
+mkdir -p $WORKDIR
+mkdir -p $DATASET_DIR
+mkdir -p $CONFIG_DIR
+mkdir -p $LOGDIR
+
+# ---- data preparation
+
+if [[ -z "${SKIPDATA}" ]]; then
+    cp -R $DATADIR/* $DATASET_DIR/
+
+    mkdir -p $DATASET_DIR/masks
+    python scripts/process_instance_masks.py \
+        --in-dir $RAW_MASKS_DIR \
+        --out-dir $DATASET_DIR/masks \
+        --num-workers $NUM_WORKERS
+
+    python scripts/image2mask.py \
+        --in-dir $DATASET_DIR \
+        --out-dataset $DATASET_DIR/dataset_raw.csv
+
+    catalyst-data split-dataframe \
+        --in-csv $DATASET_DIR/dataset_raw.csv \
+        --n-folds=5 --train-folds=0,1,2,3 \
+        --out-csv=$DATASET_DIR/dataset.csv
+fi
+
+
+# ---- config preparation
+
+python ./scripts/prepare_config.py \
+    --in-template=$CONFIG_TEMPLATE \
+    --out-config=$CONFIG_DIR/config.yml \
+    --expdir=./src \
+    --dataset-path=$DATASET_DIR \
+    --num-classes=2 \
+    --num-workers=$NUM_WORKERS \
+    --batch-size=$BATCH_SIZE \
+    --image-size=$IMAGE_SIZE
+
+cp -r ./configs/_common.yml $CONFIG_DIR/_common.yml
+
+
+# ---- model training
+
+catalyst-dl run \
+    -C $CONFIG_DIR/_common.yml $CONFIG_DIR/config.yml \
+    --logdir $LOGDIR $*
diff --git a/bin/tests/_check_instance.sh b/bin/tests/_check_instance.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+set -e
+
+mkdir -p data
+
+download-gdrive 1RCqaQZLziuq1Z4sbMpwD_WHjqR5cdPvh dsb2018_cleared_191109.tar.gz
+tar -xf dsb2018_cleared_191109.tar.gz &>/dev/null
+mv dsb2018_cleared_191109 ./data/origin
+
+USE_WANDB=0 \
+CUDA_VISIBLE_DEVICES="" \
+CUDNN_BENCHMARK="True" \
+CUDNN_DETERMINISTIC="True" \
+WORKDIR=./logs \
+DATADIR=./data/origin \
+MAX_IMAGE_SIZE=256 \
+CONFIG_TEMPLATE=./configs/templates/instance.yml \
+NUM_WORKERS=0 \
+BATCH_SIZE=2 \
+bash ./bin/catalyst-instance-segmentation-pipeline.sh --check
+
+
+python -c """
+import pathlib
+from safitty import Safict
+
+folder = list(pathlib.Path('./logs/').glob('logdir-*'))[0]
+metrics = Safict.load(f'{folder}/checkpoints/_metrics.json')
+
+aggregated_loss = metrics.get('best', 'loss')
+iou_soft = metrics.get('best', 'iou_soft')
+iou_hard = metrics.get('best', 'iou_hard')
+
+print(aggregated_loss)
+print(iou_soft)
+print(iou_hard)
+
+assert aggregated_loss < 0.9
+assert iou_soft > 0.06
+assert iou_hard > 0.1
+"""