Merge branch 'develop' into vs/sseg_tiler_mapi

sovrasov · web-flow · commit 335262fc4c8e · 2024-09-26T20:14:31.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,12 +2,35 @@
 
 All notable changes to this project will be documented in this file.
 
-## \[2.2.0\]
+## \[2.3.0\]
 
 ### New features
 
 - Add YOLOv9 model for Object Detection
   (https://github.com/openvinotoolkit/training_extensions/pull/3917)
+- Add OV inference for keypoint detection
+  (https://github.com/openvinotoolkit/training_extensions/pull/3970)
+- Add tiling for semantic segmentation
+  (https://github.com/openvinotoolkit/training_extensions/pull/3954)
+
+### Enhancements
+
+- Upgrade OV, MAPI, and NNCF dependencies
+  (https://github.com/openvinotoolkit/training_extensions/pull/3967)
+- Instance Segmentation Model refactoring
+  (https://github.com/openvinotoolkit/training_extensions/pull/3865)
+- Bump torch and lightning to 2.4.0 versions
+  (https://github.com/openvinotoolkit/training_extensions/pull/3843)
+
+### Bug fixes
+
+- Fix a wrong HPO log
+  (https://github.com/openvinotoolkit/training_extensions/pull/3972)
+
+## \[2.2.0\]
+
+### New features
+
 - Add RT-DETR model for Object Detection
   (https://github.com/openvinotoolkit/training_extensions/pull/3741)
 - Add Multi-Label & H-label Classification with torchvision models
diff --git a/src/otx/algo/detection/heads/yolo_head.py b/src/otx/algo/detection/heads/yolo_head.py
@@ -296,8 +296,8 @@ class YOLOHeadModule(BaseDenseHead):
         csp_args (dict[str, Any], optional): Arguments for CSP blocks. Defaults to None.
         aux_cfg (dict[str, Any], optional): Configuration for auxiliary head. Defaults to None.
         with_nms (bool, optional): Whether to use NMS. Defaults to True.
-        min_confidence (float, optional): Minimum confidence for NMS. Defaults to 0.05.
-        min_iou (float, optional): Minimum IoU for NMS. Defaults to 0.9.
+        min_confidence (float, optional): Minimum confidence for NMS. Defaults to 0.1.
+        min_iou (float, optional): Minimum IoU for NMS. Defaults to 0.65.
     """
 
     def __init__(
@@ -311,8 +311,8 @@ def __init__(
         csp_args: dict[str, Any] | None = None,
         aux_cfg: dict[str, Any] | None = None,
         with_nms: bool = True,
-        min_confidence: float = 0.05,
-        min_iou: float = 0.9,
+        min_confidence: float = 0.1,
+        min_iou: float = 0.65,
     ) -> None:
         if len(csp_channels) - 1 != len(concat_sources):
             msg = (
diff --git a/src/otx/algo/detection/losses/yolov9_loss.py b/src/otx/algo/detection/losses/yolov9_loss.py
@@ -371,9 +371,9 @@ def __init__(
         loss_dfl: nn.Module | None = None,
         loss_iou: nn.Module | None = None,
         reg_max: int = 16,
-        cls_rate: float = 1.5,
-        dfl_rate: float = 7.5,
-        iou_rate: float = 0.5,
+        cls_rate: float = 0.5,
+        dfl_rate: float = 1.5,
+        iou_rate: float = 7.5,
         aux_rate: float = 0.25,
     ) -> None:
         super().__init__()
@@ -394,7 +394,7 @@ def forward(
         main_preds: tuple[Tensor, Tensor, Tensor],
         targets: Tensor,
         aux_preds: tuple[Tensor, Tensor, Tensor] | None = None,
-    ) -> dict[str, Tensor]:
+    ) -> dict[str, Tensor] | None:
         """Forward pass of the YOLOv9 criterion module.
 
         Args:
@@ -405,6 +405,10 @@ def forward(
         Returns:
             dict[str, Tensor]: The loss dictionary.
         """
+        if targets.shape[1] == 0:
+            # TODO (sungchul): should this step be done here?
+            return None
+
         main_preds = self.vec2box(main_preds)
         main_iou, main_dfl, main_cls = self._forward(main_preds, targets)
 
diff --git a/src/otx/algo/detection/rtdetr.py b/src/otx/algo/detection/rtdetr.py
@@ -135,7 +135,7 @@ def _customize_inputs(
 
     def _customize_outputs(
         self,
-        outputs: list[torch.Tensor] | dict,
+        outputs: list[torch.Tensor] | dict,  # type: ignore[override]
         inputs: DetBatchDataEntity,
     ) -> DetBatchPredEntity | OTXBatchLossEntity:
         if self.training:
diff --git a/src/otx/algo/detection/yolov9.py b/src/otx/algo/detection/yolov9.py
@@ -19,6 +19,7 @@
 from otx.core.metrics.fmeasure import MeanAveragePrecisionFMeasureCallable
 from otx.core.model.base import DefaultOptimizerCallable, DefaultSchedulerCallable
 from otx.core.model.detection import OTXDetectionModel
+from otx.core.types.export import TaskLevelExportParameters
 
 if TYPE_CHECKING:
     from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable
@@ -116,7 +117,7 @@ def _exporter(self) -> OTXModelExporter:
             std=self.std,
             resize_mode="fit_to_window_letterbox",
             pad_value=114,
-            swap_rgb=True,
+            swap_rgb=False,
             via_onnx=True,
             onnx_export_configuration={
                 "input_names": ["image"],
@@ -135,6 +136,14 @@ def _exporter(self) -> OTXModelExporter:
             output_names=None,  # TODO (someone): support XAI
         )
 
+    @property
+    def _export_parameters(self) -> TaskLevelExportParameters:
+        """Defines parameters required to export a particular model implementation."""
+        return super()._export_parameters.wrap(
+            confidence_threshold=self.model.bbox_head.min_confidence,
+            iou_threshold=self.model.bbox_head.min_iou,
+        )
+
     def to(self, *args, **kwargs) -> Self:
         """Sync device of the model and its components."""
         ret = super().to(*args, **kwargs)
diff --git a/src/otx/core/model/base.py b/src/otx/core/model/base.py
@@ -141,9 +141,13 @@ def __init__(
         # so that it can retrieve it from the checkpoint
         self.save_hyperparameters(logger=False, ignore=["optimizer", "scheduler", "metric"])
 
-    def training_step(self, batch: T_OTXBatchDataEntity, batch_idx: int) -> Tensor:
+    def training_step(self, batch: T_OTXBatchDataEntity, batch_idx: int) -> Tensor | None:
         """Step for model training."""
         train_loss = self.forward(inputs=batch)
+        if train_loss is None:
+            # to skip current iteration
+            # TODO (sungchul): check this in distributed training
+            return None if self.trainer.world_size == 1 else torch.tensor(0.0, device=self.device)
 
         if isinstance(train_loss, Tensor):
             self.log(
diff --git a/src/otx/core/model/detection.py b/src/otx/core/model/detection.py
@@ -137,12 +137,15 @@ def _customize_inputs(
 
         return inputs
 
-    def _customize_outputs(
+    def _customize_outputs(  # type: ignore[override]
         self,
-        outputs: list[InstanceData] | dict,
+        outputs: list[InstanceData] | dict | None,
         inputs: DetBatchDataEntity,
-    ) -> DetBatchPredEntity | OTXBatchLossEntity:
+    ) -> DetBatchPredEntity | OTXBatchLossEntity | None:
         if self.training:
+            if outputs is None:
+                return outputs
+
             if not isinstance(outputs, dict):
                 raise TypeError(outputs)
 
diff --git a/src/otx/recipe/detection/yolov9_c.yaml b/src/otx/recipe/detection/yolov9_c.yaml
@@ -7,7 +7,7 @@ model:
     optimizer:
       class_path: torch.optim.SGD
       init_args:
-        lr: 0.001
+        lr: 0.0001
         momentum: 0.937
         weight_decay: 0.0005
         nesterov: true
@@ -16,13 +16,13 @@ model:
       class_path: otx.core.schedulers.LinearWarmupSchedulerCallable
       init_args:
         num_warmup_steps: 3
-        warmup_interval: epoch
         main_scheduler_callable:
-          class_path: torch.optim.lr_scheduler.LinearLR
+          class_path: lightning.pytorch.cli.ReduceLROnPlateau
           init_args:
-            total_iters: 200
-            start_factor: 1
-            end_factor: 0.01
+            mode: max
+            factor: 0.1
+            patience: 4
+            monitor: val/map_50
 
 engine:
   task: DETECTION
@@ -42,23 +42,38 @@ overrides:
     input_size:
       - 640
       - 640
-    image_color_channel: BGR
     train_subset:
-      batch_size: 16
+      batch_size: 10
       transforms:
         - class_path: otx.core.data.transform_libs.torchvision.CachedMosaic
           init_args:
             random_pop: false
             max_cached_images: 20
             img_scale: $(input_size) # (H, W)
-        - class_path: otx.core.data.transform_libs.torchvision.RandomCrop
+        - class_path: otx.core.data.transform_libs.torchvision.RandomAffine
           init_args:
-            crop_size: $(input_size) * 0.5
+            scaling_ratio_range:
+              - 0.1
+              - 2.0
+            border: $(input_size) * -0.5
+        - class_path: otx.core.data.transform_libs.torchvision.CachedMixUp
+          init_args:
+            img_scale: $(input_size) # (H, W)
+            ratio_range:
+              - 1.0
+              - 1.0
+            prob: 0.5
+            random_pop: false
+            max_cached_images: 10
+        - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug
         - class_path: otx.core.data.transform_libs.torchvision.Resize
           init_args:
             scale: $(input_size)
             keep_ratio: true
             transform_bbox: true
+        - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
+          init_args:
+            prob: 0.5
         - class_path: otx.core.data.transform_libs.torchvision.Pad
           init_args:
             pad_to_square: true
@@ -75,7 +90,7 @@ overrides:
         class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
-      batch_size: 16
+      batch_size: 10
       transforms:
         - class_path: otx.core.data.transform_libs.torchvision.Resize
           init_args:
@@ -95,7 +110,7 @@ overrides:
             std: [255.0, 255.0, 255.0]
 
     test_subset:
-      batch_size: 16
+      batch_size: 10
       transforms:
         - class_path: otx.core.data.transform_libs.torchvision.Resize
           init_args:
diff --git a/src/otx/recipe/detection/yolov9_m.yaml b/src/otx/recipe/detection/yolov9_m.yaml
@@ -7,7 +7,7 @@ model:
     optimizer:
       class_path: torch.optim.SGD
       init_args:
-        lr: 0.001
+        lr: 0.0001
         momentum: 0.937
         weight_decay: 0.0005
         nesterov: true
@@ -42,23 +42,38 @@ overrides:
     input_size:
       - 640
       - 640
-    image_color_channel: BGR
     train_subset:
-      batch_size: 16
+      batch_size: 12
       transforms:
         - class_path: otx.core.data.transform_libs.torchvision.CachedMosaic
           init_args:
             random_pop: false
             max_cached_images: 20
             img_scale: $(input_size) # (H, W)
-        - class_path: otx.core.data.transform_libs.torchvision.RandomCrop
+        - class_path: otx.core.data.transform_libs.torchvision.RandomAffine
           init_args:
-            crop_size: $(input_size) * 0.5
+            scaling_ratio_range:
+              - 0.1
+              - 2.0
+            border: $(input_size) * -0.5
+        - class_path: otx.core.data.transform_libs.torchvision.CachedMixUp
+          init_args:
+            img_scale: $(input_size) # (H, W)
+            ratio_range:
+              - 1.0
+              - 1.0
+            prob: 0.5
+            random_pop: false
+            max_cached_images: 10
+        - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug
         - class_path: otx.core.data.transform_libs.torchvision.Resize
           init_args:
             scale: $(input_size)
             keep_ratio: true
             transform_bbox: true
+        - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
+          init_args:
+            prob: 0.5
         - class_path: otx.core.data.transform_libs.torchvision.Pad
           init_args:
             pad_to_square: true
@@ -75,7 +90,7 @@ overrides:
         class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
-      batch_size: 16
+      batch_size: 12
       transforms:
         - class_path: otx.core.data.transform_libs.torchvision.Resize
           init_args:
@@ -95,7 +110,7 @@ overrides:
             std: [255.0, 255.0, 255.0]
 
     test_subset:
-      batch_size: 16
+      batch_size: 12
       transforms:
         - class_path: otx.core.data.transform_libs.torchvision.Resize
           init_args:
diff --git a/src/otx/recipe/detection/yolov9_s.yaml b/src/otx/recipe/detection/yolov9_s.yaml
diff --git a/tests/unit/algo/detection/losses/test_yolov9_loss.py b/tests/unit/algo/detection/losses/test_yolov9_loss.py
diff --git a/tests/unit/algo/detection/test_yolov9.py b/tests/unit/algo/detection/test_yolov9.py
diff --git a/tests/unit/core/model/test_base.py b/tests/unit/core/model/test_base.py